[PATCH 3/6] drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3
Alex Deucher
alexander.deucher at amd.com
Wed May 10 17:56:27 UTC 2023
From: Felix Kuehling <Felix.Kuehling at amd.com>
Treat system memory on NUMA systems as remote by default. Overriding with
a more efficient MTYPE per page will be implemented in the next patch.
No need for a special case for APP APUs. System memory is handled the same
for carve-out and native mode. And VRAM doesn't exist in native mode.
Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Reviewed-by: Philip Yang <Philip.Yang at amd.com>
Reviewed-and-tested-by: Rajneesh Bhardwaj <rajneesh.bhardwaj at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 +++++++++++----------------
drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 +++++++++-------
2 files changed, 30 insertions(+), 34 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 59ce741dfa73..52f5bab5fcb7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1191,9 +1191,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
- unsigned int mtype;
- unsigned int mtype_default;
+ /* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/
+ unsigned int mtype_local, mtype;
bool snoop = false;
+ bool is_local;
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(9, 4, 1):
@@ -1233,35 +1234,26 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
}
break;
case IP_VERSION(9, 4, 3):
- /* FIXME: Needs more work for handling multiple memory
- * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
- * modes.
- * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
- * To force use of MTYPE_RW, set use_mtype_cc_wa=0
+ /* Only local VRAM BOs or system memory on non-NUMA APUs
+ * can be assumed to be local in their entirety. Choose
+ * MTYPE_NC as safe fallback for all system memory BOs on
+ * NUMA systems. Their MTYPE can be overridden per-page in
+ * gmc_v9_0_override_vm_pte_flags.
*/
- mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+ mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
+ is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
+ num_possible_nodes() <= 1) ||
+ (is_vram && adev == bo_adev /* TODO: memory partitions &&
+ bo->mem_id == vm->mem_id*/);
snoop = true;
if (uncached) {
mtype = MTYPE_UC;
- } else if (adev->gmc.is_app_apu) {
- /* FIXME: APU in native mode, NPS1 single socket only
- *
- * For suporting NUMA partitioned APU e.g. in NPS4 mode,
- * this need to look at the NUMA node on which the
- * system memory allocation was done.
- *
- * Memory access by a different partition within same
- * socket should be treated as remote access so MTYPE_RW
- * cannot be used always.
- */
- mtype = mtype_default;
} else if (adev->flags & AMD_IS_APU) {
- /* APU on carve out mode */
- mtype = mtype_default;
+ mtype = is_local ? mtype_local : MTYPE_NC;
} else {
/* dGPU */
- if (is_vram && bo_adev == adev)
- mtype = mtype_default;
+ if (is_local)
+ mtype = mtype_local;
else if (is_vram)
mtype = MTYPE_NC;
else
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c55b9754c506..ab1acf97d049 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1150,6 +1150,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED;
+ unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_node = prange->svm_bo->node;
@@ -1190,19 +1191,16 @@ svm_range_get_pte_flags(struct kfd_node *node,
}
break;
case IP_VERSION(9, 4, 3):
- //TODO: Need more work for handling multiple memory partitions
- //e.g. NPS4. Current approch is only applicable without memory
- //partitions.
+ mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC :
+ AMDGPU_VM_MTYPE_RW;
snoop = true;
if (uncached) {
mapping_flags |= AMDGPU_VM_MTYPE_UC;
} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
- /* local HBM region close to partition
- * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
- * To force use of MTYPE_RW, set use_mtype_cc_wa=0
- */
- if (bo_node == node)
- mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
+ /* local HBM region close to partition */
+ if (bo_node->adev == node->adev /* TODO: memory partitions &&
+ bo_node->mem_id == node->mem_id*/)
+ mapping_flags |= mtype_local;
/* local HBM region far from partition or remote XGMI GPU */
else if (svm_nodes_in_same_hive(bo_node, node))
mapping_flags |= AMDGPU_VM_MTYPE_NC;
@@ -1211,7 +1209,13 @@ svm_range_get_pte_flags(struct kfd_node *node,
mapping_flags |= AMDGPU_VM_MTYPE_UC;
/* system memory accessed by the APU */
} else if (node->adev->flags & AMD_IS_APU) {
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ /* On NUMA systems, locality is determined per-page
+ * in amdgpu_gmc_override_vm_pte_flags
+ */
+ if (num_possible_nodes() <= 1)
+ mapping_flags |= mtype_local;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
mapping_flags |= AMDGPU_VM_MTYPE_UC;
--
2.40.1
More information about the amd-gfx
mailing list