[PATCH v2 2/2] drm/admgpu: Present amdgpu_task_info in VM_FAULTS.

zhoucm1 zhoucm1 at amd.com
Thu Jul 5 02:05:55 UTC 2018



On 2018年07月04日 23:04, Andrey Grodzovsky wrote:
> Extract and present the reposnsible process and thread when
> VM_FAULT happens.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++
>   drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c  | 10 ++++++++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  9 +++++++--
>   3 files changed, 27 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 7a625f3..1c483ea 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -187,6 +187,18 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   	if (p->uf_entry.robj)
>   		p->job->uf_addr = uf_offset;
>   	kfree(chunk_array);
> +
> +	/* Use this opportunity to fill in task info for the vm */
> +	if (!vm->task_info.pid) {
> +		vm->task_info.pid = current->pid;
> +		get_task_comm(vm->task_info.task_name, current);
> +
> +		if (current->group_leader->mm == current->mm) {
> +			vm->task_info.tgid = current->group_leader->pid;
> +			get_task_comm(vm->task_info.process_name, current->group_leader);
> +		}
> +	}
> +
you can wrap this segment to  a function like amdgpu_vm_set_task_info.


>   	return 0;
>   
>   free_all_kdata:
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> index 08753e7..7ad19f9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
> @@ -46,6 +46,7 @@
>   
>   #include "ivsrcid/ivsrcid_vislands30.h"
>   
> +#include "amdgpu_vm.h"
>   
>   static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
>   static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
> @@ -1449,8 +1450,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
>   		gmc_v8_0_set_fault_enable_default(adev, false);
>   
>   	if (printk_ratelimit()) {
> -		dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
> -			entry->src_id, entry->src_data[0]);
> +		struct amdgpu_task_info task_info = { 0 };
> +
> +		amdgpu_vm_task_info(adev, entry->pasid, &task_info);
you can rename this function to amdgpu_vm_get_task_info.

general, it looks very good to me and does what I want to do before.

Thanks,
David Zhou
> +
> +		dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
> +			entry->src_id, entry->src_data[0], task_info.process_name,
> +			task_info.tgid, task_info.task_name, task_info.pid);
>   		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
>   			addr);
>   		dev_err(adev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 691a659..384a89c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -259,11 +259,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   	}
>   
>   	if (printk_ratelimit()) {
> +		struct amdgpu_task_info task_info = { 0 };
> +
> +		amdgpu_vm_task_info(adev, entry->pasid, &task_info);
> +
>   		dev_err(adev->dev,
> -			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
> +			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n",
>   			entry->vmid_src ? "mmhub" : "gfxhub",
>   			entry->src_id, entry->ring_id, entry->vmid,
> -			entry->pasid);
> +			entry->pasid, task_info.process_name, task_info.tgid,
> +			task_info.task_name, task_info.pid);
>   		dev_err(adev->dev, "  at page 0x%016llx from %d\n",
>   			addr, entry->client_id);
>   		if (!amdgpu_sriov_vf(adev))



More information about the amd-gfx mailing list