[PATCH 1/4] drm/amdkfd: Create node in kfd sysfs for memory only numa node

Felix Kuehling felix.kuehling at amd.com
Wed Jun 2 05:48:04 UTC 2021


Am 2021-06-01 um 3:07 p.m. schrieb Oak Zeng:
> Previously kfd driver assumes all CPU numa nodes are associated
> with system memory

This is not true. Systems where there is no memory installed in one or
more CPU sockets have been supported. The only thing that was not
supported is NUMA domains with memory but no CPU (or GPU).


>  and there is no memory only numa node. This
> is not true anymore for ALDEBARAN A+A set up. This patch creates
> memory only node in kfd sysfs.
>
> Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 73 ++++++++++++++++++++---------------
>  1 file changed, 42 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 3251fe2..56e6dff 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -827,8 +827,11 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
>  	uint32_t flags = 0;
>  	uint32_t width;
>  
> -	pr_debug("Found memory entry in CRAT table with proximity_domain=%d\n",
> -			mem->proximity_domain);
> +	size_in_bytes =
> +		((uint64_t)mem->length_high << 32) +
> +		mem->length_low;
> +	pr_debug("Found memory entry in CRAT table with proximity_domain=%d, size %lld\n",
> +			mem->proximity_domain, size_in_bytes);
>  	list_for_each_entry(dev, device_list, list) {
>  		if (mem->proximity_domain == dev->proximity_domain) {
>  			/* We're on GPU node */
> @@ -848,9 +851,6 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
>  			if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE)
>  				flags |= HSA_MEM_FLAGS_NON_VOLATILE;
>  
> -			size_in_bytes =
> -				((uint64_t)mem->length_high << 32) +
> -							mem->length_low;
>  			width = mem->width;
>  
>  			/* Multiple banks of the same type are aggregated into
> @@ -1718,51 +1718,62 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
>  	sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1);
>  
>  	for_each_online_node(numa_node_id) {
> +		pr_debug("numa node id %d\n", numa_node_id);
>  		if (kfd_numa_node_to_apic_id(numa_node_id) == -1)
> -			continue;
> -
> -		/* Fill in Subtype: Compute Unit */
> -		ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
> -			crat_table->num_domains,
> -			(struct crat_subtype_computeunit *)sub_type_hdr);
> -		if (ret < 0)
> -			return ret;
> -		crat_table->length += sub_type_hdr->length;
> -		crat_table->total_entries++;
> +			pr_debug("Numa node %d is a memory only numa node\n", numa_node_id);
> +
> +		if (kfd_numa_node_to_apic_id(numa_node_id) != -1) {

This should be an else-branch of the previous "if".


> +			/* Fill in Subtype: Compute Unit */
> +			ret = kfd_fill_cu_for_cpu(numa_node_id, &avail_size,
> +					crat_table->num_domains,
> +					(struct crat_subtype_computeunit *)sub_type_hdr);
> +			if (ret < 0) {
> +				pr_err("fill cu for cpu failed\n");
> +				return ret;
> +			}
> +			crat_table->length += sub_type_hdr->length;
> +			crat_table->total_entries++;
>  
> -		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
> -			sub_type_hdr->length);
> +			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
> +					sub_type_hdr->length);
> +		}
>  
>  		/* Fill in Subtype: Memory */
>  		ret = kfd_fill_mem_info_for_cpu(numa_node_id, &avail_size,
>  			crat_table->num_domains,

So you're creating a dangling memory object that's not associated with
any node and therefore has no IO links? I'm not sure how that will work.
I was expecting that there would be at least a dummy CPU node with 0
cores but with IO links to express the NUMA distances.

Regards,
  Felix


>  			(struct crat_subtype_memory *)sub_type_hdr);
> -		if (ret < 0)
> +		if (ret < 0) {
> +			pr_err("fill mem for cpu failed\n");
>  			return ret;
> +		}
>  		crat_table->length += sub_type_hdr->length;
>  		crat_table->total_entries++;
>  
>  		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
>  			sub_type_hdr->length);
>  
> -		/* Fill in Subtype: IO Link */
> +		if (kfd_numa_node_to_apic_id(numa_node_id) != -1) {
> +			/* Fill in Subtype: IO Link */
>  #ifdef CONFIG_X86_64
> -		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
> -				&entries,
> -				(struct crat_subtype_iolink *)sub_type_hdr);
> -		if (ret < 0)
> -			return ret;
> +			ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
> +					&entries,
> +					(struct crat_subtype_iolink *)sub_type_hdr);
> +			if (ret < 0) {
> +				pr_err("fill iolink for cpu failed\n");
> +				return ret;
> +			}
>  
> -		if (entries) {
> -			crat_table->length += (sub_type_hdr->length * entries);
> -			crat_table->total_entries += entries;
> +			if (entries) {
> +				crat_table->length += (sub_type_hdr->length * entries);
> +				crat_table->total_entries += entries;
>  
> -			sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
> -					sub_type_hdr->length * entries);
> -		}
> +				sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
> +						sub_type_hdr->length * entries);
> +			}
>  #else
> -		pr_info("IO link not available for non x86 platforms\n");
> +			pr_info("IO link not available for non x86 platforms\n");
>  #endif
> +		}
>  
>  		crat_table->num_domains++;
>  	}


More information about the amd-gfx mailing list