[PATCH] drm/amdgpu: extend lock range for race condition when gpu reset

zhoucm1 david1.zhou at amd.com
Fri May 5 07:33:13 UTC 2017


Reviewed-by: Chunming Zhou <david1.zhou at amd.com>

On 2017年05月05日 15:22, Roger.He wrote:
> to cover below case:
> 1. A task gart bind/unbind but not add to adev->gtt_list yet
> 2. at this time gpu reset, gtt only recover those gtt in adev->gtt_list
>
> Change-Id: Ifb4360e3b68624f2be67fa82100623cf4c451873
> Signed-off-by: Roger.He <Hongbo.He at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c |  6 ++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 22 ++++++++++++++--------
>   3 files changed, 19 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 90a69bf..5310781 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -556,7 +556,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>   int amdgpu_gart_init(struct amdgpu_device *adev);
>   void amdgpu_gart_fini(struct amdgpu_device *adev);
> -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   			int pages);
>   int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
>   		     int pages, struct page **pagelist,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index e7406ce..ccef3cf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -221,8 +221,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
>    *
>    * Unbinds the requested pages from the gart page table and
>    * replaces them with the dummy page (all asics).
> + * Returns 0 for success, -EINVAL for failure.
>    */
> -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
> +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   			int pages)
>   {
>   	unsigned t;
> @@ -234,7 +235,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   
>   	if (!adev->gart.ready) {
>   		WARN(1, "trying to unbind memory from uninitialized GART !\n");
> -		return;
> +		return -EINVAL;
>   	}
>   
>   	t = offset / AMDGPU_GPU_PAGE_SIZE;
> @@ -255,6 +256,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   	}
>   	mb();
>   	amdgpu_gart_flush_gpu_tlb(adev, 0);
> +	return 0;
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index c3fb2f9..278f55b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
>   		return r;
>   	}
>   
> +	spin_lock(&gtt->adev->gtt_list_lock);
>   	flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>   	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
>   	r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
> @@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
>   	if (r) {
>   		DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
>   			  ttm->num_pages, gtt->offset);
> -		return r;
> +		goto error_gart_bind;
>   	}
> -	spin_lock(&gtt->adev->gtt_list_lock);
> +
>   	list_add_tail(&gtt->list, &gtt->adev->gtt_list);
> +error_gart_bind:
>   	spin_unlock(&gtt->adev->gtt_list_lock);
> -	return 0;
> +	return r;
>   }
>   
>   int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
> @@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
>   static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
>   {
>   	struct amdgpu_ttm_tt *gtt = (void *)ttm;
> +	int r;
>   
>   	if (gtt->userptr)
>   		amdgpu_ttm_tt_unpin_userptr(ttm);
> @@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
>   		return 0;
>   
>   	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
> -	if (gtt->adev->gart.ready)
> -		amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
> -
>   	spin_lock(&gtt->adev->gtt_list_lock);
> +	r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
> +	if (r) {
> +		DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
> +			  gtt->ttm.ttm.num_pages, gtt->offset);
> +		goto error_unbind;
> +	}
>   	list_del_init(&gtt->list);
> +error_unbind:
>   	spin_unlock(&gtt->adev->gtt_list_lock);
> -
> -	return 0;
> +	return r;
>   }
>   
>   static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)



More information about the amd-gfx mailing list