[PATCH 048/159] drm/amdgpu: new cache coherence change for Aldebaran

Sun Feb 28 08:19:22 UTC 2021

On 2021/2/25 上午6:17, Alex Deucher wrote:
> From: Eric Huang <jinhuieric.huang at amd.com>
>
> To support new cache coherence HW on A+A platform mainly in KFD.
>
> Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
> Reviewed-by: Oak Zeng <Oak.Zeng at amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 30 +++++++++++++++++--
>   drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c         |  3 ++
>   2 files changed, 31 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 8781051afb69..30e41d1b3256 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -30,6 +30,7 @@
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_dma_buf.h"
>   #include <uapi/linux/kfd_ioctl.h>
> +#include "amdgpu_xgmi.h"
>   
>   /* BO flag to indicate a KFD userptr BO */
>   #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
> @@ -404,6 +405,8 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>   	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
>   	bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
>   	uint32_t mapping_flags;
> +	uint64_t pte_flags;
> +	bool snoop = false;
>   
>   	mapping_flags = AMDGPU_VM_PAGE_READABLE;
>   	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
> @@ -413,7 +416,6 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>   
>   	switch (adev->asic_type) {
>   	case CHIP_ARCTURUS:
> -	case CHIP_ALDEBARAN:
>   		if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
>   			if (bo_adev == adev)
>   				mapping_flags |= coherent ?
> @@ -425,12 +427,36 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
>   				AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
>   		}
>   		break;
> +	case CHIP_ALDEBARAN:
> +		if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
> +			if (bo_adev == adev) {
> +				mapping_flags |= AMDGPU_VM_MTYPE_RW;
For local VRAM, if it's shared by peer GPU, RW could keep cache coherence?

Do we need UC mtype in this case?

> +				if (adev->gmc.xgmi.connected_to_cpu)
> +					snoop = true;
> +			} else {
> +				mapping_flags |= AMDGPU_VM_MTYPE_NC;
> +				if (amdgpu_xgmi_same_hive(adev, bo_adev))
> +					snoop = true;
Do we need to check same hive id here?

if so , CHIP_ARCTURUS may add similar check.

Why not use UC mtype here for gpu-gpu memory share? NC could keep cache 
coherence among gpu sharing? by PTE_SNOOPED type?

> +			}
> +		} else {
> +			snoop = true;
> +			if (adev->gmc.xgmi.connected_to_cpu)
> +				/* system memory uses NC on A+A */
> +				mapping_flags |= AMDGPU_VM_MTYPE_NC;
> +			else
> +				mapping_flags |= coherent ?
> +					AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
> +		}
> +		break;
>   	default:
>   		mapping_flags |= coherent ?
>   			AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
>   	}
>   
> -	return amdgpu_gem_va_map_flags(adev, mapping_flags);
> +	pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
> +	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
> +
> +	return pte_flags;
>   }
>   
>   /* add_bo_to_vm - Add a BO to a VM
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 94552048aada..32b552e54e77 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -1040,6 +1040,9 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
>   	    !(*flags & AMDGPU_PTE_SYSTEM) &&
>   	    mapping->bo_va->is_xgmi)
>   		*flags |= AMDGPU_PTE_SNOOPED;
> +
> +	if (adev->asic_type == CHIP_ALDEBARAN)
> +		*flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
It breaks the change of "drm/amdgpu: Fix an omission when adding 
Aldebaran support", which sets PTE_SNOOPED for xgmi VRAM bo only.

And it seems no need to set PTE_SNOOPED here, since get_pte_flags() has 
handled that.

>   }
>   
>   static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)