[PATCH] drm/amdgpu: Add EXT_COHERENCE memory allocation flags

David Francis David.Francis at amd.com
Mon Jul 24 14:17:02 UTC 2023


Cover letter got lost, here it is:

This is in support of a RCCL change that requires specific
coherence behaviour.

Corresponding Thunk patch is at
https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88

> These flags (for GEM and SVM allocations) allocate
> memory that allows for system-scope atomic semantics.
>
> On GFX943 these flags cause caches to be avoided on
> non-local memory.
>
> On all other ASICs they are identical in functionality to the
> equivalent COHERENT flags.
>
> Signed-off-by: David Francis<David.Francis at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c |  2 ++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c           |  1 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c           |  1 +
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c            |  5 ++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_svm.c             | 10 +++++++++-
>   include/uapi/drm/amdgpu_drm.h                    |  7 +++++++
>   include/uapi/linux/kfd_ioctl.h                   |  3 +++
>   7 files changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 9e18fe5eb190..67634e9f6466 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1790,6 +1790,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
>   
>   	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
>   		alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
> +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENCE)
> +		alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENCE;
>   	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
>   		alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 1c07459e2bd2..6a6f6068bea0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -631,6 +631,7 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
>   	}
>   
>   	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
> +			       AMDGPU_GEM_CREATE_EXT_COHERENCE |
>   			       AMDGPU_GEM_CREATE_UNCACHED))
>   		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
>   			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> index a6ee0220db56..ff330c7c0232 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
> @@ -540,6 +540,7 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
>   	}
>   
>   	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
> +			       AMDGPU_GEM_CREATE_EXT_COHERENCE |
>   			       AMDGPU_GEM_CREATE_UNCACHED))
>   		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
>   			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 8447fcada8bb..17cf19c868e1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1180,7 +1180,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
>   {
>   	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
>   	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
> -	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
> +	bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENCE);
> +	bool ext_coherence = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENCE;
>   	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
>   	struct amdgpu_vm *vm = mapping->bo_va->base.vm;
>   	unsigned int mtype_local, mtype;
> @@ -1248,6 +1249,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
>   		snoop = true;
>   		if (uncached) {
>   			mtype = MTYPE_UC;
> +		} else if (ext_coherence) {
> +			mtype = is_local ? MTYPE_CC : MTYPE_UC;
>   		} else if (adev->flags & AMD_IS_APU) {
>   			mtype = is_local ? mtype_local : MTYPE_NC;
>   		} else {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> index 909f1ef8927d..acb87b2fe8df 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
> @@ -1157,7 +1157,8 @@ svm_range_get_pte_flags(struct kfd_node *node,
>   	uint32_t mapping_flags = 0;
>   	uint64_t pte_flags;
>   	bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
> -	bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
> +	bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENCE);
> +	bool ext_coherence = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENCE;
>   	bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
>   	unsigned int mtype_local;
>   
> @@ -1205,6 +1206,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
>   		snoop = true;
>   		if (uncached) {
>   			mapping_flags |= AMDGPU_VM_MTYPE_UC;
> +		} else if (ext_coherence) {
> +			/* local HBM region close to partition */
> +			if (bo_node->adev == node->adev &&
> +			    (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
> +				mapping_flags |= AMDGPU_VM_MTYPE_CC;
> +			else
> +				mapping_flags |= AMDGPU_VM_MTYPE_UC;
>   		} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
>   			/* local HBM region close to partition */
>   			if (bo_node->adev == node->adev &&
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 3b98635bdfa8..7dbee3e3d9d1 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -173,6 +173,13 @@ extern "C" {
>    * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
>    */
>   #define AMDGPU_GEM_CREATE_UNCACHED		(1 << 14)
> +/* Flag that BO should be coherent across devices when using device-level
> + * atomics. May depend on GPU instructions to flush caches explicitly.
> + *
> + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and
> + * may override the MTYPE selected in AMDGPU_VA_OP_MAP.
> + */
> +#define AMDGPU_GEM_CREATE_EXT_COHERENCE		(1 << 15)
>   
>   /* hybrid specific */
>   /* Flag that the memory should be in SPARSE resource */
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index e467ac678ba9..7a4041ee5951 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -416,6 +416,7 @@ struct kfd_ioctl_acquire_vm_args {
>   #define KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM	(1 << 27)
>   #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT	(1 << 26)
>   #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED	(1 << 25)
> +#define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENCE	(1 << 24)
>   
>   /* Allocate memory for later SVM (shared virtual memory) mapping.
>    *
> @@ -777,6 +778,8 @@ struct kfd_ioctl_cross_memory_copy_deprecated_args {
>   #define KFD_IOCTL_SVM_FLAG_GPU_READ_MOSTLY     0x00000020
>   /* Keep GPU memory mapping always valid as if XNACK is disable */
>   #define KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED   0x00000040
> +/* Fine grained coherency between all devices using device-scope atomics */
> +#define KFD_IOCTL_SVM_FLAG_EXT_COHERENCE       0x00000080
>   
>   /**
>    * kfd_ioctl_svm_op - SVM ioctl operations
> -- 2.34.1


More information about the amd-gfx mailing list