[PATCH] drm/amdgpu: Add more page fault info printing for GFX10

Christian König ckoenig.leichtzumerken at gmail.com
Mon Aug 12 19:12:16 UTC 2019


Am 12.08.19 um 21:05 schrieb Zhao, Yong:
> The printing we did for GFX9 was not propogated to GFX10 somehow, so fix
> it now.
>
> Change-Id: Ic0b8381134340b83cd69c3fe186ac7a8a97b1bca
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 33 ++++++++++++++++++++++----
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  |  5 +++-
>   2 files changed, 32 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 4e3ac1084a94..f23be98e9897 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -140,17 +140,40 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
>   	}
>   
>   	if (printk_ratelimit()) {
> +		struct amdgpu_task_info task_info;
> +
> +		memset(&task_info, 0, sizeof(struct amdgpu_task_info));
> +		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
> +
>   		dev_err(adev->dev,
> -			"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
> +			"[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, "
> +			"for process:%s pid:%d thread:%s pid:%d)\n",
>   			entry->vmid_src ? "mmhub" : "gfxhub",
>   			entry->src_id, entry->ring_id, entry->vmid,
> -			entry->pasid);
> -		dev_err(adev->dev, "  at page 0x%016llx from %d\n",
> +			entry->pasid, task_info.process_name, task_info.tgid,
> +			task_info.task_name, task_info.pid);
> +		dev_err(adev->dev, "  in page starting at address 0x%016llx from client %d\n",
>   			addr, entry->client_id);
> -		if (!amdgpu_sriov_vf(adev))
> +		if (!amdgpu_sriov_vf(adev)) {
>   			dev_err(adev->dev,
> -				"VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
> +				"GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n",
>   				status);
> +			dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n",
> +				REG_GET_FIELD(status,
> +				GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS));
> +			dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n",
> +				REG_GET_FIELD(status,
> +				GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR));
> +			dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n",
> +				REG_GET_FIELD(status,
> +				GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS));
> +			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
> +				REG_GET_FIELD(status,
> +				GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
> +			dev_err(adev->dev, "\t RW: 0x%lx\n",
> +				REG_GET_FIELD(status,
> +				GCVM_L2_PROTECTION_FAULT_STATUS, RW));
> +		}
>   	}
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 296e2d982578..34c4c2d08550 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -364,7 +364,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   
>   		dev_err(adev->dev,
>   			"[%s] %s page fault (src_id:%u ring:%u vmid:%u "
> -			"pasid:%u, for process %s pid %d thread %s pid %d)\n",
> +			"pasid:%u, for process:%s pid:%d thread:%s pid:%d)\n",

I think the text actually looks better without the ":".

>   			hub_name, retry_fault ? "retry" : "no-retry",
>   			entry->src_id, entry->ring_id, entry->vmid,
>   			entry->pasid, task_info.process_name, task_info.tgid,
> @@ -387,6 +387,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
>   			dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n",
>   				REG_GET_FIELD(status,
>   				VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR));
> +			dev_err(adev->dev, "\t RW: 0x%lx\n",
> +				REG_GET_FIELD(status,
> +				VM_L2_PROTECTION_FAULT_STATUS, RW));

That should probably be a separate patch since it is fixing gfx9.

Apart from that the patch looks good to me,
Christian.

>   
>   		}
>   	}



More information about the amd-gfx mailing list