[PATCH 3/5] drm/amdgpu: stop using TTMs fault callback

Fri Sep 25 19:08:24 UTC 2020

Tested-by: Nirmoy Das <nirmoy.das at amd.com>

On 9/25/20 4:55 PM, Christian König wrote:
> Implement the fault handler ourself using the provided TTM functions.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 20 +++++------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 40 +++++++++++++++++++---
>   3 files changed, 46 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 63e9c5793c30..80bc7177cd45 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -1341,19 +1341,14 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo)
>    * Returns:
>    * 0 for success or a negative error code on failure.
>    */
> -int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
> +vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   {
>   	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
>   	struct ttm_operation_ctx ctx = { false, false };
> -	struct amdgpu_bo *abo;
> +	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>   	unsigned long offset, size;
>   	int r;
>   
> -	if (!amdgpu_bo_is_amdgpu_bo(bo))
> -		return 0;
> -
> -	abo = ttm_to_amdgpu_bo(bo);
> -
>   	/* Remember that this BO was accessed by the CPU */
>   	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>   
> @@ -1367,7 +1362,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   
>   	/* Can't move a pinned BO to visible VRAM */
>   	if (abo->tbo.pin_count > 0)
> -		return -EINVAL;
> +		return VM_FAULT_SIGBUS;
>   
>   	/* hurrah the memory is not visible ! */
>   	atomic64_inc(&adev->num_vram_cpu_page_faults);
> @@ -1379,15 +1374,18 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   	abo->placement.busy_placement = &abo->placements[1];
>   
>   	r = ttm_bo_validate(bo, &abo->placement, &ctx);
> -	if (unlikely(r != 0))
> -		return r;
> +	if (unlikely(r == -EBUSY || r == -ERESTARTSYS))
> +		return VM_FAULT_NOPAGE;
> +	else if (unlikely(r))
> +		return VM_FAULT_SIGBUS;
>   
>   	offset = bo->mem.start << PAGE_SHIFT;
>   	/* this should never happen */
>   	if (bo->mem.mem_type == TTM_PL_VRAM &&
>   	    (offset + size) > adev->gmc.visible_vram_size)
> -		return -EINVAL;
> +		return VM_FAULT_SIGBUS;
>   
> +	ttm_bo_move_to_lru_tail_unlocked(bo);
>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index e91750e43448..132e5f955180 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -284,7 +284,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
>   			   bool evict,
>   			   struct ttm_resource *new_mem);
>   void amdgpu_bo_release_notify(struct ttm_buffer_object *bo);
> -int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
> +vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
>   void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
>   		     bool shared);
>   int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index d3bd2fd448be..399961035ae6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1708,7 +1708,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
>   	.verify_access = &amdgpu_verify_access,
>   	.move_notify = &amdgpu_bo_move_notify,
>   	.release_notify = &amdgpu_bo_release_notify,
> -	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
>   	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
>   	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
>   	.access_memory = &amdgpu_ttm_access_memory,
> @@ -2088,15 +2087,48 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>   	adev->mman.buffer_funcs_enabled = enable;
>   }
>   
> +static vm_fault_t amdgpu_ttm_fault(struct vm_fault *vmf)
> +{
> +	struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
> +	vm_fault_t ret;
> +
> +	ret = ttm_bo_vm_reserve(bo, vmf);
> +	if (ret)
> +		return ret;
> +
> +	ret = amdgpu_bo_fault_reserve_notify(bo);
> +	if (ret)
> +		goto unlock;
> +
> +	ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
> +				       TTM_BO_VM_NUM_PREFAULT, 1);
> +	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
> +		return ret;
> +
> +unlock:
> +	dma_resv_unlock(bo->base.resv);
> +	return ret;
> +}
> +
> +static struct vm_operations_struct amdgpu_ttm_vm_ops = {
> +	.fault = amdgpu_ttm_fault,
> +	.open = ttm_bo_vm_open,
> +	.close = ttm_bo_vm_close,
> +	.access = ttm_bo_vm_access
> +};
> +
>   int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
>   {
>   	struct drm_file *file_priv = filp->private_data;
>   	struct amdgpu_device *adev = drm_to_adev(file_priv->minor->dev);
> +	int r;
>   
> -	if (adev == NULL)
> -		return -EINVAL;
> +	r = ttm_bo_mmap(filp, vma, &adev->mman.bdev);
> +	if (unlikely(r != 0))
> +		return r;
>   
> -	return ttm_bo_mmap(filp, vma, &adev->mman.bdev);
> +	vma->vm_ops = &amdgpu_ttm_vm_ops;
> +	return 0;
>   }
>   
>   int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,