[PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.
Christian König
christian.koenig at amd.com
Thu Jan 21 10:42:34 UTC 2021
Am 20.01.21 um 20:38 schrieb Andrey Grodzovsky:
> Ping
>
> Andrey
>
> On 1/20/21 12:01 AM, Andrey Grodzovsky wrote:
>>
>> On 1/19/21 3:48 AM, Christian König wrote:
>>> Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
>>>> Handle all DMA IOMMU gropup related dependencies before the
>>>> group is removed.
>>>>
>>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46
>>>> ++++++++++++++++++++++++++++++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 +-
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 1 +
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 ++
>>>> 6 files changed, 65 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 478a7d8..2953420 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -51,6 +51,7 @@
>>>> #include <linux/dma-fence.h>
>>>> #include <linux/pci.h>
>>>> #include <linux/aer.h>
>>>> +#include <linux/notifier.h>
>>>> #include <drm/ttm/ttm_bo_api.h>
>>>> #include <drm/ttm/ttm_bo_driver.h>
>>>> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>>>> bool in_pci_err_recovery;
>>>> struct pci_saved_state *pci_state;
>>>> +
>>>> + struct notifier_block nb;
>>>> + struct blocking_notifier_head notifier;
>>>> + struct list_head device_bo_list;
>>>> };
>>>> static inline struct amdgpu_device *drm_to_adev(struct
>>>> drm_device *ddev)
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> index 45e23e3..e99f4f1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>> @@ -70,6 +70,8 @@
>>>> #include <drm/task_barrier.h>
>>>> #include <linux/pm_runtime.h>
>>>> +#include <linux/iommu.h>
>>>> +
>>>> MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>>>> MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>>>> MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
>>>> @@ -3200,6 +3202,39 @@ static const struct attribute
>>>> *amdgpu_dev_attributes[] = {
>>>> };
>>>> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
>>>> + unsigned long action, void *data)
>>>> +{
>>>> + struct amdgpu_device *adev = container_of(nb, struct
>>>> amdgpu_device, nb);
>>>> + struct amdgpu_bo *bo = NULL;
>>>> +
>>>> + /*
>>>> + * Following is a set of IOMMU group dependencies taken care
>>>> of before
>>>> + * device's IOMMU group is removed
>>>> + */
>>>> + if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
>>>> +
>>>> + spin_lock(&ttm_bo_glob.lru_lock);
>>>> + list_for_each_entry(bo, &adev->device_bo_list, bo) {
>>>> + if (bo->tbo.ttm)
>>>> + ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
>>>> + }
>>>> + spin_unlock(&ttm_bo_glob.lru_lock);
>>>
>>> That approach won't work. ttm_tt_unpopulate() might sleep on an
>>> IOMMU lock.
>>>
>>> You need to use a mutex here or even better make sure you can access
>>> the device_bo_list without a lock in this moment.
>>>
>>> Christian.
>>
>>
>> I can think of switching to RCU list ? Otherwise, elements are added
>> on BO create and deleted on BO destroy, how can i prevent any of
>> those from
>> happening while in this section besides mutex ? Make a copy list and
>> run over it instead ?
RCU won't work since the BO is not RCU protected.
What you can try something like this:
spin_lock(&ttm_bo_glob.lru_lock);
while (list_not_empty(&adev->device_bo_list)) {
bo = list_first_entry(&adev->device_bo_list);
list_del(bo->...);
spin_unlock(&ttm_bo_glob.lru_lock);
ttm_tt_unpopulate(bo);
spin_lock(&ttm_bo_glob.lru_lock);
}...
Regards,
Christian.
>>
>> Andrey
>>
>>
>>>
>>>> +
>>>> + if (adev->irq.ih.use_bus_addr)
>>>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih);
>>>> + if (adev->irq.ih1.use_bus_addr)
>>>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
>>>> + if (adev->irq.ih2.use_bus_addr)
>>>> + amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
>>>> +
>>>> + amdgpu_gart_dummy_page_fini(adev);
>>>> + }
>>>> +
>>>> + return NOTIFY_OK;
>>>> +}
>>>> +
>>>> +
>>>> /**
>>>> * amdgpu_device_init - initialize the driver
>>>> *
>>>> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device
>>>> *adev,
>>>> INIT_WORK(&adev->xgmi_reset_work,
>>>> amdgpu_device_xgmi_reset_func);
>>>> + INIT_LIST_HEAD(&adev->device_bo_list);
>>>> +
>>>> adev->gfx.gfx_off_req_count = 1;
>>>> adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>>>> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct
>>>> amdgpu_device *adev,
>>>> if (amdgpu_device_cache_pci_state(adev->pdev))
>>>> pci_restore_state(pdev);
>>>> + BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
>>>> + adev->nb.notifier_call = amdgpu_iommu_group_notifier;
>>>> +
>>>> + if (adev->dev->iommu_group) {
>>>> + r = iommu_group_register_notifier(adev->dev->iommu_group,
>>>> &adev->nb);
>>>> + if (r)
>>>> + goto failed;
>>>> + }
>>>> +
>>>> return 0;
>>>> failed:
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> index 0db9330..486ad6d 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
>>>> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct
>>>> amdgpu_device *adev)
>>>> *
>>>> * Frees the dummy page used by the driver (all asics).
>>>> */
>>>> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>>>> {
>>>> if (!adev->dummy_page_addr)
>>>> return;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> index afa2e28..5678d9c 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
>>>> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct
>>>> amdgpu_device *adev);
>>>> void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>>>> int amdgpu_gart_init(struct amdgpu_device *adev);
>>>> void amdgpu_gart_fini(struct amdgpu_device *adev);
>>>> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>>>> int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>>>> int pages);
>>>> int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> index 6cc9919..4a1de69 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
>>>> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct
>>>> ttm_buffer_object *tbo)
>>>> }
>>>> amdgpu_bo_unref(&bo->parent);
>>>> + spin_lock(&ttm_bo_glob.lru_lock);
>>>> + list_del(&bo->bo);
>>>> + spin_unlock(&ttm_bo_glob.lru_lock);
>>>> +
>>>> kfree(bo->metadata);
>>>> kfree(bo);
>>>> }
>>>> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct
>>>> amdgpu_device *adev,
>>>> if (bp->type == ttm_bo_type_device)
>>>> bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>>>> + INIT_LIST_HEAD(&bo->bo);
>>>> +
>>>> + spin_lock(&ttm_bo_glob.lru_lock);
>>>> + list_add_tail(&bo->bo, &adev->device_bo_list);
>>>> + spin_unlock(&ttm_bo_glob.lru_lock);
>>>> +
>>>> return 0;
>>>> fail_unreserve:
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> index 9ac3756..5ae8555 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
>>>> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>>>> struct list_head shadow_list;
>>>> struct kgd_mem *kfd_bo;
>>>> +
>>>> + struct list_head bo;
>>>> };
>>>> static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct
>>>> ttm_buffer_object *tbo)
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Candrey.grodzovsky%40amd.com%7C0c703eb6e73744962d3b08d8bc56f303%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637466428923905672%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=2Tkz4EMOEwFLQJUOk1ixd28c2ad1HqjBVIDO%2FX0OgqM%3D&reserved=0
>>>
More information about the amd-gfx
mailing list