[PATCH v4 07/14] drm/amdgpu: Register IOMMU topology notifier per device.

Christian König ckoenig.leichtzumerken at gmail.com
Tue Jan 19 08:48:03 UTC 2021


Am 18.01.21 um 22:01 schrieb Andrey Grodzovsky:
> Handle all DMA IOMMU gropup related dependencies before the
> group is removed.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  5 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 46 ++++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h   |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  2 ++
>   6 files changed, 65 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 478a7d8..2953420 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -51,6 +51,7 @@
>   #include <linux/dma-fence.h>
>   #include <linux/pci.h>
>   #include <linux/aer.h>
> +#include <linux/notifier.h>
>   
>   #include <drm/ttm/ttm_bo_api.h>
>   #include <drm/ttm/ttm_bo_driver.h>
> @@ -1041,6 +1042,10 @@ struct amdgpu_device {
>   
>   	bool                            in_pci_err_recovery;
>   	struct pci_saved_state          *pci_state;
> +
> +	struct notifier_block		nb;
> +	struct blocking_notifier_head	notifier;
> +	struct list_head		device_bo_list;
>   };
>   
>   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 45e23e3..e99f4f1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -70,6 +70,8 @@
>   #include <drm/task_barrier.h>
>   #include <linux/pm_runtime.h>
>   
> +#include <linux/iommu.h>
> +
>   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -3200,6 +3202,39 @@ static const struct attribute *amdgpu_dev_attributes[] = {
>   };
>   
>   
> +static int amdgpu_iommu_group_notifier(struct notifier_block *nb,
> +				     unsigned long action, void *data)
> +{
> +	struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, nb);
> +	struct amdgpu_bo *bo = NULL;
> +
> +	/*
> +	 * Following is a set of IOMMU group dependencies taken care of before
> +	 * device's IOMMU group is removed
> +	 */
> +	if (action == IOMMU_GROUP_NOTIFY_DEL_DEVICE) {
> +
> +		spin_lock(&ttm_bo_glob.lru_lock);
> +		list_for_each_entry(bo, &adev->device_bo_list, bo) {
> +			if (bo->tbo.ttm)
> +				ttm_tt_unpopulate(bo->tbo.bdev, bo->tbo.ttm);
> +		}
> +		spin_unlock(&ttm_bo_glob.lru_lock);

That approach won't work. ttm_tt_unpopulate() might sleep on an IOMMU lock.

You need to use a mutex here or even better make sure you can access the 
device_bo_list without a lock in this moment.

Christian.

> +
> +		if (adev->irq.ih.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih);
> +		if (adev->irq.ih1.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih1);
> +		if (adev->irq.ih2.use_bus_addr)
> +			amdgpu_ih_ring_fini(adev, &adev->irq.ih2);
> +
> +		amdgpu_gart_dummy_page_fini(adev);
> +	}
> +
> +	return NOTIFY_OK;
> +}
> +
> +
>   /**
>    * amdgpu_device_init - initialize the driver
>    *
> @@ -3304,6 +3339,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   
>   	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
>   
> +	INIT_LIST_HEAD(&adev->device_bo_list);
> +
>   	adev->gfx.gfx_off_req_count = 1;
>   	adev->pm.ac_power = power_supply_is_system_supplied() > 0;
>   
> @@ -3575,6 +3612,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>   	if (amdgpu_device_cache_pci_state(adev->pdev))
>   		pci_restore_state(pdev);
>   
> +	BLOCKING_INIT_NOTIFIER_HEAD(&adev->notifier);
> +	adev->nb.notifier_call = amdgpu_iommu_group_notifier;
> +
> +	if (adev->dev->iommu_group) {
> +		r = iommu_group_register_notifier(adev->dev->iommu_group, &adev->nb);
> +		if (r)
> +			goto failed;
> +	}
> +
>   	return 0;
>   
>   failed:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> index 0db9330..486ad6d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
> @@ -92,7 +92,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
>    *
>    * Frees the dummy page used by the driver (all asics).
>    */
> -static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
>   {
>   	if (!adev->dummy_page_addr)
>   		return;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> index afa2e28..5678d9c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
> @@ -61,6 +61,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
>   void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
>   int amdgpu_gart_init(struct amdgpu_device *adev);
>   void amdgpu_gart_fini(struct amdgpu_device *adev);
> +void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev);
>   int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
>   		       int pages);
>   int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 6cc9919..4a1de69 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -94,6 +94,10 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
>   	}
>   	amdgpu_bo_unref(&bo->parent);
>   
> +	spin_lock(&ttm_bo_glob.lru_lock);
> +	list_del(&bo->bo);
> +	spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   	kfree(bo->metadata);
>   	kfree(bo);
>   }
> @@ -613,6 +617,12 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
>   	if (bp->type == ttm_bo_type_device)
>   		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
>   
> +	INIT_LIST_HEAD(&bo->bo);
> +
> +	spin_lock(&ttm_bo_glob.lru_lock);
> +	list_add_tail(&bo->bo, &adev->device_bo_list);
> +	spin_unlock(&ttm_bo_glob.lru_lock);
> +
>   	return 0;
>   
>   fail_unreserve:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> index 9ac3756..5ae8555 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> @@ -110,6 +110,8 @@ struct amdgpu_bo {
>   	struct list_head		shadow_list;
>   
>   	struct kgd_mem                  *kfd_bo;
> +
> +	struct list_head		bo;
>   };
>   
>   static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)



More information about the dri-devel mailing list