[PATCH 5/5] drm/amdgpu: fix shadow BO restoring

Huang Rui ray.huang at amd.com
Mon Sep 17 09:10:00 UTC 2018


On Fri, Sep 14, 2018 at 03:42:57PM +0200, Christian König wrote:
> Don't grab the reservation lock any more and simplify the handling quite
> a bit.
> 
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 109 ++++++++---------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  46 ++++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |   8 +--
>  3 files changed, 43 insertions(+), 120 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 899342c6dfad..1cbc372964f8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -2954,54 +2954,6 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
>  	return 0;
>  }
>  
> -/**
> - * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
> - *
> - * @adev: amdgpu_device pointer
> - * @ring: amdgpu_ring for the engine handling the buffer operations
> - * @bo: amdgpu_bo buffer whose shadow is being restored
> - * @fence: dma_fence associated with the operation
> - *
> - * Restores the VRAM buffer contents from the shadow in GTT.  Used to
> - * restore things like GPUVM page tables after a GPU reset where
> - * the contents of VRAM might be lost.
> - * Returns 0 on success, negative error code on failure.
> - */
> -static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
> -						  struct amdgpu_ring *ring,
> -						  struct amdgpu_bo *bo,
> -						  struct dma_fence **fence)
> -{
> -	uint32_t domain;
> -	int r;
> -
> -	if (!bo->shadow)
> -		return 0;
> -
> -	r = amdgpu_bo_reserve(bo, true);
> -	if (r)
> -		return r;
> -	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> -	/* if bo has been evicted, then no need to recover */
> -	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> -		r = amdgpu_bo_validate(bo->shadow);
> -		if (r) {
> -			DRM_ERROR("bo validate failed!\n");
> -			goto err;
> -		}
> -
> -		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
> -						 NULL, fence, true);
> -		if (r) {
> -			DRM_ERROR("recover page table failed!\n");
> -			goto err;
> -		}
> -	}
> -err:
> -	amdgpu_bo_unreserve(bo);
> -	return r;
> -}
> -
>  /**
>   * amdgpu_device_recover_vram - Recover some VRAM contents
>   *
> @@ -3010,16 +2962,15 @@ static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
>   * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
>   * restore things like GPUVM page tables after a GPU reset where
>   * the contents of VRAM might be lost.
> - * Returns 0 on success, 1 on failure.
> + *
> + * Returns:
> + * 0 on success, negative error code on failure.
>   */
>  static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
>  {
> -	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> -	struct amdgpu_bo *bo, *tmp;
>  	struct dma_fence *fence = NULL, *next = NULL;
> -	long r = 1;
> -	int i = 0;
> -	long tmo;
> +	struct amdgpu_bo *shadow;
> +	long r = 1, tmo;
>  
>  	if (amdgpu_sriov_runtime(adev))
>  		tmo = msecs_to_jiffies(8000);
> @@ -3028,44 +2979,40 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
>  
>  	DRM_INFO("recover vram bo from shadow start\n");
>  	mutex_lock(&adev->shadow_list_lock);
> -	list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
> -		next = NULL;
> -		amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
> +	list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
> +
> +		/* No need to recover an evicted BO */
> +		if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
> +		    shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
> +			continue;
> +
> +		r = amdgpu_bo_restore_shadow(shadow, &next);
> +		if (r)
> +			break;
> +
>  		if (fence) {
>  			r = dma_fence_wait_timeout(fence, false, tmo);
> -			if (r == 0)
> -				pr_err("wait fence %p[%d] timeout\n", fence, i);
> -			else if (r < 0)
> -				pr_err("wait fence %p[%d] interrupted\n", fence, i);
> -			if (r < 1) {
> -				dma_fence_put(fence);
> -				fence = next;
> +			dma_fence_put(fence);
> +			fence = next;
> +			if (r <= 0)
>  				break;
> -			}
> -			i++;
> +		} else {
> +			fence = next;
>  		}
> -
> -		dma_fence_put(fence);
> -		fence = next;
>  	}
>  	mutex_unlock(&adev->shadow_list_lock);
>  
> -	if (fence) {
> -		r = dma_fence_wait_timeout(fence, false, tmo);
> -		if (r == 0)
> -			pr_err("wait fence %p[%d] timeout\n", fence, i);
> -		else if (r < 0)
> -			pr_err("wait fence %p[%d] interrupted\n", fence, i);
> -
> -	}
> +	if (fence)
> +		tmo = dma_fence_wait_timeout(fence, false, tmo);
>  	dma_fence_put(fence);
>  
> -	if (r > 0)
> -		DRM_INFO("recover vram bo from shadow done\n");
> -	else
> +	if (r <= 0 || tmo <= 0) {
>  		DRM_ERROR("recover vram bo from shadow failed\n");
> +		return -EIO;
> +	}
>  
> -	return (r > 0) ? 0 : 1;
> +	DRM_INFO("recover vram bo from shadow done\n");
> +	return 0;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 650c45c896f0..ca8a0b7a5ac3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -547,7 +547,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
>  	if (!r) {
>  		bo->shadow->parent = amdgpu_bo_ref(bo);
>  		mutex_lock(&adev->shadow_list_lock);
> -		list_add_tail(&bo->shadow_list, &adev->shadow_list);
> +		list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
>  		mutex_unlock(&adev->shadow_list_lock);
>  	}
>  
> @@ -679,13 +679,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
>  }
>  
>  /**
> - * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
> - * @adev: amdgpu device object
> - * @ring: amdgpu_ring for the engine handling the buffer operations
> - * @bo: &amdgpu_bo buffer to be restored
> - * @resv: reservation object with embedded fence
> + * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
> + *
> + * @shadow: &amdgpu_bo shadow to be restored
>   * @fence: dma_fence associated with the operation
> - * @direct: whether to submit the job directly
>   *
>   * Copies a buffer object's shadow content back to the object.
>   * This is used for recovering a buffer from its shadow in case of a gpu
> @@ -694,36 +691,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
>   * Returns:
>   * 0 for success or a negative error code on failure.
>   */
> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> -				  struct amdgpu_ring *ring,
> -				  struct amdgpu_bo *bo,
> -				  struct reservation_object *resv,
> -				  struct dma_fence **fence,
> -				  bool direct)
> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence)
>  
>  {
> -	struct amdgpu_bo *shadow = bo->shadow;
> -	uint64_t bo_addr, shadow_addr;
> -	int r;
> -
> -	if (!shadow)
> -		return -EINVAL;
> -
> -	bo_addr = amdgpu_bo_gpu_offset(bo);
> -	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
> -
> -	r = reservation_object_reserve_shared(bo->tbo.resv);
> -	if (r)
> -		goto err;
> +	struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
> +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +	uint64_t shadow_addr, parent_addr;
>  

May I know why won't need the reservation_object_reserve_shared() here? Is
it because we only copy buffer from shadow parent bo instead of orignal bo?

Thanks,
Ray

> -	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
> -			       amdgpu_bo_size(bo), resv, fence,
> -			       direct, false);
> -	if (!r)
> -		amdgpu_bo_fence(bo, *fence, true);
> +	shadow_addr = amdgpu_bo_gpu_offset(shadow);
> +	parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
>  
> -err:
> -	return r;
> +	return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
> +				  amdgpu_bo_size(shadow), NULL, fence,
> +				  true, false);
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 64337ff2ad63..7d3312d0da11 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
>  			       struct reservation_object *resv,
>  			       struct dma_fence **fence, bool direct);
>  int amdgpu_bo_validate(struct amdgpu_bo *bo);
> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> -				  struct amdgpu_ring *ring,
> -				  struct amdgpu_bo *bo,
> -				  struct reservation_object *resv,
> -				  struct dma_fence **fence,
> -				  bool direct);
> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
> +			     struct dma_fence **fence);
>  uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
>  					    uint32_t domain);
>  
> -- 
> 2.14.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list