[PATCH] drm/amdgpu: extend lock range for race condition when gpu reset

Christian König deathsimple at vodafone.de
Fri May 5 12:04:04 UTC 2017


Reviewed-by: Christian König <christian.koenig at amd.com>

Am 05.05.2017 um 09:33 schrieb zhoucm1:
> Reviewed-by: Chunming Zhou <david1.zhou at amd.com>
>
> On 2017年05月05日 15:22, Roger.He wrote:
>> to cover below case:
>> 1. A task gart bind/unbind but not add to adev->gtt_list yet
>> 2. at this time gpu reset, gtt only recover those gtt in adev->gtt_list
>>
>> Change-Id: Ifb4360e3b68624f2be67fa82100623cf4c451873
>> Signed-off-by: Roger.He <Hongbo.He at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c |  6 ++++--
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 22 ++++++++++++++--------
>>   3 files changed, 19 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 90a69bf..5310781 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -556,7 +556,7 @@ int amdgpu_gart_table_vram_pin(struct 
>> amdgpu_device *adev);
>>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>   int amdgpu_gart_init(struct amdgpu_device *adev);
>>   void amdgpu_gart_fini(struct amdgpu_device *adev);
>> -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>> +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>               int pages);
>>   int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
>>                int pages, struct page **pagelist,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index e7406ce..ccef3cf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -221,8 +221,9 @@ void amdgpu_gart_table_vram_free(struct 
>> amdgpu_device *adev)
>>    *
>>    * Unbinds the requested pages from the gart page table and
>>    * replaces them with the dummy page (all asics).
>> + * Returns 0 for success, -EINVAL for failure.
>>    */
>> -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>> +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>               int pages)
>>   {
>>       unsigned t;
>> @@ -234,7 +235,7 @@ void amdgpu_gart_unbind(struct amdgpu_device 
>> *adev, uint64_t offset,
>>         if (!adev->gart.ready) {
>>           WARN(1, "trying to unbind memory from uninitialized GART 
>> !\n");
>> -        return;
>> +        return -EINVAL;
>>       }
>>         t = offset / AMDGPU_GPU_PAGE_SIZE;
>> @@ -255,6 +256,7 @@ void amdgpu_gart_unbind(struct amdgpu_device 
>> *adev, uint64_t offset,
>>       }
>>       mb();
>>       amdgpu_gart_flush_gpu_tlb(adev, 0);
>> +    return 0;
>>   }
>>     /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c3fb2f9..278f55b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, 
>> struct ttm_mem_reg *bo_mem)
>>           return r;
>>       }
>>   +    spin_lock(&gtt->adev->gtt_list_lock);
>>       flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
>>       gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
>>       r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
>> @@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object 
>> *bo, struct ttm_mem_reg *bo_mem)
>>       if (r) {
>>           DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
>>                 ttm->num_pages, gtt->offset);
>> -        return r;
>> +        goto error_gart_bind;
>>       }
>> -    spin_lock(&gtt->adev->gtt_list_lock);
>> +
>>       list_add_tail(&gtt->list, &gtt->adev->gtt_list);
>> +error_gart_bind:
>>       spin_unlock(&gtt->adev->gtt_list_lock);
>> -    return 0;
>> +    return r;
>>   }
>>     int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
>> @@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device 
>> *adev)
>>   static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
>>   {
>>       struct amdgpu_ttm_tt *gtt = (void *)ttm;
>> +    int r;
>>         if (gtt->userptr)
>>           amdgpu_ttm_tt_unpin_userptr(ttm);
>> @@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct 
>> ttm_tt *ttm)
>>           return 0;
>>         /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
>> -    if (gtt->adev->gart.ready)
>> -        amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
>> -
>>       spin_lock(&gtt->adev->gtt_list_lock);
>> +    r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
>> +    if (r) {
>> +        DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
>> +              gtt->ttm.ttm.num_pages, gtt->offset);
>> +        goto error_unbind;
>> +    }
>>       list_del_init(&gtt->list);
>> +error_unbind:
>>       spin_unlock(&gtt->adev->gtt_list_lock);
>> -
>> -    return 0;
>> +    return r;
>>   }
>>     static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx




More information about the amd-gfx mailing list