[PATCH 3/4] drm/amdgpu: drm/amdgpu: Handle IOMMU enabled case

Andrey Grodzovsky andrey.grodzovsky at amd.com
Wed Aug 25 15:36:22 UTC 2021


On 2021-08-25 2:43 a.m., Christian König wrote:
>
>
> Am 24.08.21 um 23:01 schrieb Andrey Grodzovsky:
>> Handle all DMA IOMMU group related dependencies before the
>> group is removed and we try to access it after free.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  2 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c    | 50 ++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h    |  1 +
>>   3 files changed, 53 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 0b5764aa98a4..288a465b8101 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3860,6 +3860,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device 
>> *adev)
>>         amdgpu_device_ip_fini_early(adev);
>>   +    amdgpu_ttm_clear_dma_mappings(adev);
>> +
>>       amdgpu_gart_dummy_page_fini(adev);
>>         amdgpu_device_unmap_mmio(adev);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 446943e32e3e..f73d807db3b0 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -64,6 +64,7 @@
>>   static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
>>                      struct ttm_tt *ttm,
>>                      struct ttm_resource *bo_mem);
>> +
>>   static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
>>                         struct ttm_tt *ttm);
>>   @@ -2293,6 +2294,55 @@ static ssize_t amdgpu_iomem_write(struct 
>> file *f, const char __user *buf,
>>       return result;
>>   }
>>   +void amdgpu_ttm_clear_dma_mappings(struct amdgpu_device *adev)
>
> I strongly think that this function should be part of TTM. Something 
> like ttm_device_force_unpopulate.


Yes, this something I also wanted but see bellow


>
>> +{
>> +    struct ttm_device *bdev = &adev->mman.bdev;
>> +    struct ttm_resource_manager *man;
>> +    struct ttm_buffer_object *bo;
>> +    unsigned int i, j;
>> +
>> +    spin_lock(&bdev->lru_lock);
>> +    for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
>> +        man = ttm_manager_type(bdev, i);
>> +        if (!man || !man->use_tt)
>> +            continue;
>> +
>> +        while (!list_empty(&man->pinned)) {
>> +            bo = list_first_entry(&man->pinned, struct 
>> ttm_buffer_object, lru);
>> +            /* Take ref against racing releases once lru_lock is 
>> unlocked */
>> +            ttm_bo_get(bo);
>> +            list_del_init(&bo->lru);
>> +            spin_unlock(&bdev->lru_lock);
>> +
>> +            if (bo->ttm) {
>> +                amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);


amdgpu_ttm_backend_unbind is needed to be called separately from 
ttm_tt_unpopulate to take care of code
flows that do dma mapping though the gart bind and not through 
ttm_tt_populate, Since it's inside amdgpu
i had to place the entire function in amdgpu. Any suggestions ?

Andrey


>> + ttm_tt_destroy_common(bo->bdev, bo->ttm);
>
> Then you can also cleanly use ttm_tt_unpopulate here, cause this will 
> result in incorrect statistics inside TTM atm.
>
> Regards,
> Christian.
>
>> +            }
>> +
>> +            ttm_bo_put(bo);
>> +            spin_lock(&bdev->lru_lock);
>> +        }
>> +
>> +        for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) {
>> +            while (!list_empty(&man->lru[j])) {
>> +                bo = list_first_entry(&man->lru[j], struct 
>> ttm_buffer_object, lru);
>> +                ttm_bo_get(bo);
>> +                list_del_init(&bo->lru);
>> +                spin_unlock(&bdev->lru_lock);
>> +
>> +                if (bo->ttm) {
>> +                    amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
>> +                    ttm_tt_destroy_common(bo->bdev, bo->ttm);
>> +                }
>> +                ttm_bo_put(bo);
>> +                spin_lock(&bdev->lru_lock);
>> +            }
>> +        }
>> +    }
>> +    spin_unlock(&bdev->lru_lock);
>> +
>> +}
>> +
>>   static const struct file_operations amdgpu_ttm_iomem_fops = {
>>       .owner = THIS_MODULE,
>>       .read = amdgpu_iomem_read,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index e69f3e8e06e5..02c8eac48a64 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -190,6 +190,7 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
>>   uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct 
>> ttm_resource *mem);
>>   uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct 
>> ttm_tt *ttm,
>>                    struct ttm_resource *mem);
>> +void amdgpu_ttm_clear_dma_mappings(struct amdgpu_device *adev);
>>     void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
>


More information about the amd-gfx mailing list