[PATCH 1/4] drm/amdkfd: Create node in kfd sysfs for memory only numa node

Oak Zeng Oak.Zeng at amd.com
Tue Jun 1 19:07:34 UTC 2021


Previously kfd driver assumes all CPU numa nodes are associated
with system memory and there is no memory only numa node. This
is not true anymore for ALDEBARAN A+A set up. This patch creates
memory only node in kfd sysfs.

Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 73 ++++++++++++++++++++---------------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 3251fe2..56e6dff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -827,8 +827,11 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
 	uint32_t flags = 0;
 	uint32_t width;
 
-	pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
-			mem->proximity_domain);
+	size_in_bytes =
+		((uint64_t)mem->length_high << 32) +
+		mem->length_low;
+	pr_debug("Found memory entry in CRAT table with proximity_domain=%d, size %lld\n",
+			mem->proximity_domain, size_in_bytes);
 	list_for_each_entry(dev, device_list, list) {
 		if (mem->proximity_domain == dev->proximity_domain) {
 			/* We're on GPU node */
@@ -848,9 +851,6 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
 			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
 				flags |= HSA_MEM_FLAGS_NON_VOLATILE;
 
-			size_in_bytes =
-				((uint64_t)mem->length_high << 32) +
-							mem->length_low;
 			width = mem->width;
 
 			/* Multiple banks of the same type are aggregated into
@@ -1718,51 +1718,62 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
 	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
 
 	for_each_online_node(numa_node_id) {
+		pr_debug("numa node id %d\n", numa_node_id);
 		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
-			continue;
-
-		/* Fill in Subtype: Compute Unit */
-		ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
-			crat_table->num_domains,
-			(struct crat_subtype_computeunit *)sub_type_hdr);
-		if (ret < 0)
-			return ret;
-		crat_table->length += sub_type_hdr->length;
-		crat_table->total_entries++;
+			pr_debug("Numa node %d is a memory only numa node\n", numa_node_id);
+
+		if (kfd_numa_node_to_apic_id(numa_node_id) != -1) {
+			/* Fill in Subtype: Compute Unit */
+			ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
+					crat_table->num_domains,
+					(struct crat_subtype_computeunit *)sub_type_hdr);
+			if (ret < 0) {
+				pr_err("fill cu for cpu failed\n");
+				return ret;
+			}
+			crat_table->length += sub_type_hdr->length;
+			crat_table->total_entries++;
 
-		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-			sub_type_hdr->length);
+			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+					sub_type_hdr->length);
+		}
 
 		/* Fill in Subtype: Memory */
 		ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
 			crat_table->num_domains,
 			(struct crat_subtype_memory *)sub_type_hdr);
-		if (ret < 0)
+		if (ret < 0) {
+			pr_err("fill mem for cpu failed\n");
 			return ret;
+		}
 		crat_table->length += sub_type_hdr->length;
 		crat_table->total_entries++;
 
 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
 			sub_type_hdr->length);
 
-		/* Fill in Subtype: IO Link */
+		if (kfd_numa_node_to_apic_id(numa_node_id) != -1) {
+			/* Fill in Subtype: IO Link */
 #ifdef CONFIG_X86_64
-		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
-				&entries,
-				(struct crat_subtype_iolink *)sub_type_hdr);
-		if (ret < 0)
-			return ret;
+			ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
+					&entries,
+					(struct crat_subtype_iolink *)sub_type_hdr);
+			if (ret < 0) {
+				pr_err("fill iolink for cpu failed\n");
+				return ret;
+			}
 
-		if (entries) {
-			crat_table->length += (sub_type_hdr->length * entries);
-			crat_table->total_entries += entries;
+			if (entries) {
+				crat_table->length += (sub_type_hdr->length * entries);
+				crat_table->total_entries += entries;
 
-			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-					sub_type_hdr->length * entries);
-		}
+				sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
+						sub_type_hdr->length * entries);
+			}
 #else
-		pr_info("IO link not available for non x86 platforms\n");
+			pr_info("IO link not available for non x86 platforms\n");
 #endif
+		}
 
 		crat_table->num_domains++;
 	}
-- 
2.7.4



More information about the amd-gfx mailing list