[PATCH] drm/amdgpu: Enable P2P dmabuf over XGMI

Thu Aug 6 09:09:43 UTC 2020

Am 06.08.20 um 11:04 schrieb Arunpravin:
> Access the exported P2P dmabuf over XGMI, if available.
> Otherwise, fall back to the existing PCIe method.
>
> Signed-off-by: Arunpravin <apaneers at amd.com>

Reviewed-by: Christian König <christian.koenig at amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 34 +++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h |  2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 19 ++++++++++--
>   3 files changed, 52 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> index ffeb20f11c07..589d008f91df 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
> @@ -35,6 +35,7 @@
>   #include "amdgpu_display.h"
>   #include "amdgpu_gem.h"
>   #include "amdgpu_dma_buf.h"
> +#include "amdgpu_xgmi.h"
>   #include <drm/amdgpu_drm.h>
>   #include <linux/dma-buf.h>
>   #include <linux/dma-fence-array.h>
> @@ -560,3 +561,36 @@ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
>   	obj->import_attach = attach;
>   	return obj;
>   }
> +
> +/**
> + * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer
> + *
> + * @adev: amdgpu_device pointer of the importer
> + * @bo: amdgpu buffer object
> + *
> + * Returns:
> + * True if dmabuf accessible over xgmi, false otherwise.
> + */
> +bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
> +				      struct amdgpu_bo *bo)
> +{
> +	struct drm_gem_object *obj = &bo->tbo.base;
> +	struct drm_gem_object *gobj;
> +
> +	if (obj->import_attach) {
> +		struct dma_buf *dma_buf = obj->import_attach->dmabuf;
> +
> +		if (dma_buf->ops != &amdgpu_dmabuf_ops)
> +			/* No XGMI with non AMD GPUs */
> +			return false;
> +
> +		gobj = dma_buf->priv;
> +		bo = gem_to_amdgpu_bo(gobj);
> +	}
> +
> +	if (amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
> +			(bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM))
> +		return true;
> +
> +	return false;
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
> index ec447a7b6b28..2c5c84a06bb9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
> @@ -29,6 +29,8 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
>   					int flags);
>   struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
>   					    struct dma_buf *dma_buf);
> +bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
> +				      struct amdgpu_bo *bo);
>   void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
>   void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
>   int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 71e005cf2952..771c27478bb1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -28,6 +28,7 @@
>   #include <linux/dma-fence-array.h>
>   #include <linux/interval_tree_generic.h>
>   #include <linux/idr.h>
> +#include <linux/dma-buf.h>
>   
>   #include <drm/amdgpu_drm.h>
>   #include "amdgpu.h"
> @@ -35,6 +36,7 @@
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_gmc.h"
>   #include "amdgpu_xgmi.h"
> +#include "amdgpu_dma_buf.h"
>   
>   /**
>    * DOC: GPUVM
> @@ -1778,15 +1780,24 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
>   		nodes = NULL;
>   		resv = vm->root.base.bo->tbo.base.resv;
>   	} else {
> +		struct drm_gem_object *obj = &bo->tbo.base;
>   		struct ttm_dma_tt *ttm;
>   
> +		resv = bo->tbo.base.resv;
> +		if (obj->import_attach && bo_va->is_xgmi) {
> +			struct dma_buf *dma_buf = obj->import_attach->dmabuf;
> +			struct drm_gem_object *gobj = dma_buf->priv;
> +			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
> +
> +			if (abo->tbo.mem.mem_type == TTM_PL_VRAM)
> +				bo = gem_to_amdgpu_bo(gobj);
> +		}
>   		mem = &bo->tbo.mem;
>   		nodes = mem->mm_node;
>   		if (mem->mem_type == TTM_PL_TT) {
>   			ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
>   			pages_addr = ttm->dma_address;
>   		}
> -		resv = bo->tbo.base.resv;
>   	}
>   
>   	if (bo) {
> @@ -2132,8 +2143,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
>   	INIT_LIST_HEAD(&bo_va->valids);
>   	INIT_LIST_HEAD(&bo_va->invalids);
>   
> -	if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev)) &&
> -	    (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)) {
> +	if (!bo)
> +		return bo_va;
> +
> +	if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) {
>   		bo_va->is_xgmi = true;
>   		/* Power up XGMI if it can be potentially used */
>   		amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20);