[PATCH] amd/amdgpu: force to trigger a no-retry-fault after a retry-fault

Sat Nov 16 09:32:11 UTC 2019

Am 15.11.19 um 23:38 schrieb Alex Sierra:
> After a retry-fault happens, the fault handler will modify the UTCL2 to
> set PTE bits to force a no-retry-fault. This will cause the wave to
> enter the trap handler.

NAK, you can't do this unconditionally since that behavior is not wanted 
for graphics.

Christian.

>
> Change-Id: I177102891f715068f15605957ff23b0cab862603
> Signed-off-by: Alex Sierra <alex.sierra at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 +++++++--------
>   1 file changed, 7 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 3c0bd6472a46..9ad7345d315d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -3167,7 +3167,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>   			    uint64_t addr)
>   {
>   	struct amdgpu_bo *root;
> -	uint64_t value, flags;
> +	uint64_t value = 0;
> +	uint64_t flags;
>   	struct amdgpu_vm *vm;
>   	long r;
>   
> @@ -3196,17 +3197,15 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
>   		goto error_unlock;
>   
>   	addr /= AMDGPU_GPU_PAGE_SIZE;
> -	flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
> -		AMDGPU_PTE_SYSTEM;
>   
>   	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
> -		/* Redirect the access to the dummy page */
> -		value = adev->dummy_page_addr;
> -		flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE |
> -			AMDGPU_PTE_WRITEABLE;
> +		/* Setting PTE flags to trigger a no-retry-fault  */
> +		flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
> +			AMDGPU_PTE_TF;
>   	} else {
>   		/* Let the hw retry silently on the PTE */
> -		value = 0;
> +		flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
> +			AMDGPU_PTE_SYSTEM;
>   	}
>   
>   	r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1,