[PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.

Andrey Grodzovsky Andrey.Grodzovsky at amd.com
Wed Jan 20 05:01:11 UTC 2021


On 1/19/21 3:48 AM, Christian König wrote:
> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>> Handle all DMA IOMMU gropup related dependencies before the
>> group is removed.
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>>   6 files changed, 65 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 478a7d8..2953420 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -51,6 +51,7 @@
>>   #include <linux/dma-fence.h>
>>   #include <linux/pci.h>
>>   #include <linux/aer.h>
>> +#include <linux/notifier.h>
>>     #include <drm/ttm/ttm_bo_api.h>
>>   #include <drm/ttm/ttm_bo_driver.h>
>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>         bool                            in_pci_err_recovery;
>>       struct pci_saved_state          *pci_state;
>> +
>> +    struct notifier_block        nb;
>> +    struct blocking_notifier_head    notifier;
>> +    struct list_head        device_bo_list;
>>   };
>>     static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index 45e23e3..e99f4f1 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -70,6 +70,8 @@
>>   #include <drm/task_barrier.h>
>>   #include <linux/pm_runtime.h>
>>   +#include <linux/iommu.h>
>> +
>>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] 
>> = {
>>   };
>>     +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>> +                     unsigned long action, void *data)
>> +{
>> +    struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
>> +    struct amdgpu_bo *bo = NULL;
>> +
>> +    /*
>> +     * Following is a set of IOMMU group dependencies taken care of before
>> +     * device's IOMMU group is removed
>> +     */
>> +    if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>> +
>> +        spin_lock(&ttm_bo_glob.lru_lock);
>> +        list_for_each_entry(bo, &adev->device_bo_list, bo) {
>> +            if (bo->tbo.ttm)
>> +                ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>> +        }
>> +        spin_unlock(&ttm_bo_glob.lru_lock);
>
> That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.
>
> You need to use a mutex here or even better make sure you can access the 
> device_bo_list without a lock in this moment.
>
> Christian.


I can think of switching to RCU list ? Otherwise, elements are added
on BO create and deleted on BO destroy, how can i prevent any of those from
happening while in this section besides mutex ? Make a copy list and run over it 
instead ?

Andrey


>
>> +
>> +        if (adev->irq.ih.use_bus_addr)
>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>> +        if (adev->irq.ih1.use_bus_addr)
>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>> +        if (adev->irq.ih2.use_bus_addr)
>> +            amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>> +
>> +        amdgpu_gart_dummy_page_fini(adev);
>> +    }
>> +
>> +    return NOTIFY_OK;
>> +}
>> +
>> +
>>   /**
>>    * amdgpu_device_init - initialize the driver
>>    *
>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>>   +    INIT_LIST_HEAD(&adev->device_bo_list);
>> +
>>       adev->gfx.gfx_off_req_count = 1;
>>       adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>   @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>       if (amdgpu_device_cache_pci_state(adev->pdev))
>>           pci_restore_state(pdev);
>>   +    BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>> +    adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>> +
>> +    if (adev->dev->iommu_group) {
>> +        r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
>> +        if (r)
>> +            goto failed;
>> +    }
>> +
>>       return 0;
>>     failed:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> index 0db9330..486ad6d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device 
>> *adev)
>>    *
>>    * Frees the dummy page used by the driver (all asics).
>>    */
>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>   {
>>       if (!adev->dummy_page_addr)
>>           return;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> index afa2e28..5678d9c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>   int amdgpu_gart_init(struct amdgpu_device *adev);
>>   void amdgpu_gart_fini(struct amdgpu_device *adev);
>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>                  int pages);
>>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> index 6cc9919..4a1de69 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>>       }
>>       amdgpu_bo_unref(&bo->parent);
>>   +    spin_lock(&ttm_bo_glob.lru_lock);
>> +    list_del(&bo->bo);
>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>> +
>>       kfree(bo->metadata);
>>       kfree(bo);
>>   }
>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>>       if (bp->type == ttm_bo_type_device)
>>           bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>   +    INIT_LIST_HEAD(&bo->bo);
>> +
>> +    spin_lock(&ttm_bo_glob.lru_lock);
>> +    list_add_tail(&bo->bo, &adev->device_bo_list);
>> +    spin_unlock(&ttm_bo_glob.lru_lock);
>> +
>>       return 0;
>>     fail_unreserve:
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> index 9ac3756..5ae8555 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>       struct list_head        shadow_list;
>>         struct kgd_mem                  *kfd_bo;
>> +
>> +    struct list_head        bo;
>>   };
>>     static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object 
>> *tbo)
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C0c703eb6e73744962d3b08d8bc56f303%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466428923905672%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=2Tkz4EMOEwFLQJUOk1ixd28c2ad1HqjBVIDO%2FX0OgqM%3D&reserved=0 
>


More information about the amd-gfx mailing list