[PATCH 3/3] drm/amdkfd: Extend KFD device topology to surface peer-to-peer links

Felix Kuehling felix.kuehling at amd.com
Mon Jun 6 21:07:52 UTC 2022


Am 2022-06-06 um 14:08 schrieb Ramesh Errabolu:
> Extend KFD device topology to surface peer-to-peer links among
> GPU devices connected over PCIe or xGMI. Enabling HSA_AMD_P2P is
> REQUIRED to surface peer-to-peer links.
>
> Prior to this KFD did not expose to user mode any P2P links or
> indirect links that go over two or more direct hops. Old versions
> of the Thunk used to make up their own P2P and indirect links without
> the information about peer-accessibility and chipset support available
> to the kernel mode driver. In this patch we expose P2P links in a new
> sysfs directory to provide more reliable P2P link information to user
> mode.
>
> Old versions of the Thunk will continue to work as before and ignore
> the new directory. This avoids conflicts between P2P links exposed by
> KFD and P2P links created by the Thunk itself. New versions of the Thunk
> will use only the P2P links provided in the new p2p_links directory, if
> it exists, or fall back to the old code path on older KFDs that don't
> expose p2p_links.
>
> Signed-off-by: Ramesh Errabolu <Ramesh.Errabolu at amd.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 319 +++++++++++++++++++++-
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   3 +
>   2 files changed, 320 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 8d50d207cf66..3e240b22ec91 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -40,6 +40,7 @@
>   #include "kfd_svm.h"
>   #include "amdgpu_amdkfd.h"
>   #include "amdgpu_ras.h"
> +#include "amdgpu.h"
>   
>   /* topology_device_list - Master list of all topology devices */
>   static struct list_head topology_device_list;
> @@ -148,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
>   	struct kfd_mem_properties *mem;
>   	struct kfd_cache_properties *cache;
>   	struct kfd_iolink_properties *iolink;
> +	struct kfd_iolink_properties *p2plink;
>   	struct kfd_perf_properties *perf;
>   
>   	list_del(&dev->list);
> @@ -173,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
>   		kfree(iolink);
>   	}
>   
> +	while (dev->p2p_link_props.next != &dev->p2p_link_props) {
> +		p2plink = container_of(dev->p2p_link_props.next,
> +				struct kfd_iolink_properties, list);
> +		list_del(&p2plink->list);
> +		kfree(p2plink);
> +	}
> +
>   	while (dev->perf_props.next != &dev->perf_props) {
>   		perf = container_of(dev->perf_props.next,
>   				struct kfd_perf_properties, list);
> @@ -214,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device(
>   	INIT_LIST_HEAD(&dev->mem_props);
>   	INIT_LIST_HEAD(&dev->cache_props);
>   	INIT_LIST_HEAD(&dev->io_link_props);
> +	INIT_LIST_HEAD(&dev->p2p_link_props);
>   	INIT_LIST_HEAD(&dev->perf_props);
>   
>   	list_add_tail(&dev->list, device_list);
> @@ -465,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
>   			      dev->node_props.caches_count);
>   	sysfs_show_32bit_prop(buffer, offs, "io_links_count",
>   			      dev->node_props.io_links_count);
> +	sysfs_show_32bit_prop(buffer, offs, "p2p_links_count",
> +			      dev->node_props.p2p_links_count);
>   	sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base",
>   			      dev->node_props.cpu_core_id_base);
>   	sysfs_show_32bit_prop(buffer, offs, "simd_id_base",
> @@ -568,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr)
>   
>   static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>   {
> +	struct kfd_iolink_properties *p2plink;
>   	struct kfd_iolink_properties *iolink;
>   	struct kfd_cache_properties *cache;
>   	struct kfd_mem_properties *mem;
> @@ -585,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>   		dev->kobj_iolink = NULL;
>   	}
>   
> +	if (dev->kobj_p2plink) {
> +		list_for_each_entry(p2plink, &dev->p2p_link_props, list)
> +			if (p2plink->kobj) {
> +				kfd_remove_sysfs_file(p2plink->kobj,
> +							&p2plink->attr);
> +				p2plink->kobj = NULL;
> +			}
> +		kobject_del(dev->kobj_p2plink);
> +		kobject_put(dev->kobj_p2plink);
> +		dev->kobj_p2plink = NULL;
> +	}
> +
>   	if (dev->kobj_cache) {
>   		list_for_each_entry(cache, &dev->cache_props, list)
>   			if (cache->kobj) {
> @@ -631,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
>   static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>   		uint32_t id)
>   {
> +	struct kfd_iolink_properties *p2plink;
>   	struct kfd_iolink_properties *iolink;
>   	struct kfd_cache_properties *cache;
>   	struct kfd_mem_properties *mem;
> @@ -668,6 +694,10 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>   	if (!dev->kobj_iolink)
>   		return -ENOMEM;
>   
> +	dev->kobj_p2plink = kobject_create_and_add("p2p_links", dev->kobj_node);
> +	if (!dev->kobj_p2plink)
> +		return -ENOMEM;
> +
>   	dev->kobj_perf = kobject_create_and_add("perf", dev->kobj_node);
>   	if (!dev->kobj_perf)
>   		return -ENOMEM;
> @@ -757,6 +787,27 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
>   		i++;
>   	}
>   
> +	i = 0;
> +	list_for_each_entry(p2plink, &dev->p2p_link_props, list) {
> +		p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> +		if (!p2plink->kobj)
> +			return -ENOMEM;
> +		ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
> +				dev->kobj_p2plink, "%d", i);
> +		if (ret < 0) {
> +			kobject_put(p2plink->kobj);
> +			return ret;
> +		}
> +
> +		p2plink->attr.name = "properties";
> +		p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
> +		sysfs_attr_init(&iolink->attr);
> +		ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
> +		if (ret < 0)
> +			return ret;
> +		i++;
> +	}
> +
>   	/* All hardware blocks have the same number of attributes. */
>   	num_attrs = ARRAY_SIZE(perf_attr_iommu);
>   	list_for_each_entry(perf, &dev->perf_props, list) {
> @@ -1145,6 +1196,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
>   	struct kfd_mem_properties *mem;
>   	struct kfd_cache_properties *cache;
>   	struct kfd_iolink_properties *iolink;
> +	struct kfd_iolink_properties *p2plink;
>   
>   	down_write(&topology_lock);
>   	list_for_each_entry(dev, &topology_device_list, list) {
> @@ -1165,6 +1217,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
>   				cache->gpu = dev->gpu;
>   			list_for_each_entry(iolink, &dev->io_link_props, list)
>   				iolink->gpu = dev->gpu;
> +			list_for_each_entry(p2plink, &dev->p2p_link_props, list)
> +				p2plink->gpu = dev->gpu;
>   			break;
>   		}
>   	}
> @@ -1287,6 +1341,250 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev)
>   			kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
>   		}
>   	}
> +
> +	/* Create indirect links so apply flags setting to all */
> +	list_for_each_entry(link, &dev->p2p_link_props, list) {
> +		link->flags = CRAT_IOLINK_FLAGS_ENABLED;
> +		kfd_set_iolink_no_atomics(dev, NULL, link);
> +		peer_dev = kfd_topology_device_by_proximity_domain(
> +				link->node_to);
> +
> +		if (!peer_dev)
> +			continue;
> +
> +		list_for_each_entry(inbound_link, &peer_dev->p2p_link_props,
> +									list) {
> +			if (inbound_link->node_to != link->node_from)
> +				continue;
> +
> +			inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED;
> +			kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link);
> +			kfd_set_iolink_non_coherent(peer_dev, link, inbound_link);
> +		}
> +	}
> +}
> +
> +static int kfd_build_p2p_node_entry(struct kfd_topology_device *dev,
> +				struct kfd_iolink_properties *p2plink)
> +{
> +	int ret;
> +
> +	p2plink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> +	if (!p2plink->kobj)
> +		return -ENOMEM;
> +
> +	ret = kobject_init_and_add(p2plink->kobj, &iolink_type,
> +			dev->kobj_p2plink, "%d", dev->node_props.p2p_links_count - 1);
> +	if (ret < 0) {
> +		kobject_put(p2plink->kobj);
> +		return ret;
> +	}
> +
> +	p2plink->attr.name = "properties";
> +	p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
> +	sysfs_attr_init(&p2plink->attr);
> +	ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;
> +}
> +
> +static int kfd_create_indirect_link_prop(struct kfd_topology_device *kdev, int gpu_node)
> +{
> +	struct kfd_iolink_properties *props = NULL, *props2 = NULL;
> +	struct kfd_iolink_properties *gpu_link, *cpu_link;
> +	struct kfd_topology_device *cpu_dev;
> +	int ret = 0;
> +	int i, num_cpu;
> +
> +	num_cpu = 0;
> +	list_for_each_entry(cpu_dev, &topology_device_list, list) {
> +		if (cpu_dev->gpu)
> +			break;
> +		num_cpu++;
> +	}
> +
> +	gpu_link = list_first_entry(&kdev->io_link_props,
> +					struct kfd_iolink_properties, list);
> +	if (!gpu_link)
> +		return -ENOMEM;
> +
> +	for (i = 0; i < num_cpu; i++) {
> +		/* CPU <--> GPU */
> +		if (gpu_link->node_to == i)
> +			continue;
> +
> +		/* find CPU <-->  CPU links */
> +		cpu_dev = kfd_topology_device_by_proximity_domain(i);
> +		if (cpu_dev) {
> +			list_for_each_entry(cpu_link,
> +					&cpu_dev->io_link_props, list) {
> +				if (cpu_link->node_to == gpu_link->node_to)
> +					break;
> +			}
> +		}
> +
> +		if (cpu_link->node_to != gpu_link->node_to)
> +			return -ENOMEM;
> +
> +		/* CPU <--> CPU <--> GPU, GPU node*/
> +		props = kfd_alloc_struct(props);
> +		if (!props)
> +			return -ENOMEM;
> +
> +		memcpy(props, gpu_link, sizeof(struct kfd_iolink_properties));
> +		props->weight = gpu_link->weight + cpu_link->weight;
> +		props->min_latency = gpu_link->min_latency + cpu_link->min_latency;
> +		props->max_latency = gpu_link->max_latency + cpu_link->max_latency;
> +		props->min_bandwidth = min(gpu_link->min_bandwidth, cpu_link->min_bandwidth);
> +		props->max_bandwidth = min(gpu_link->max_bandwidth, cpu_link->max_bandwidth);
> +
> +		props->node_from = gpu_node;
> +		props->node_to = i;
> +		kdev->node_props.p2p_links_count++;
> +		list_add_tail(&props->list, &kdev->p2p_link_props);
> +		ret = kfd_build_p2p_node_entry(kdev, props);
> +		if (ret < 0)
> +			return ret;
> +
> +		/* for small Bar, no CPU --> GPU in-direct links */
> +		if (kfd_dev_is_large_bar(kdev->gpu)) {
> +			/* CPU <--> CPU <--> GPU, CPU node*/
> +			props2 = kfd_alloc_struct(props2);
> +			if (!props2)
> +				return -ENOMEM;
> +
> +			memcpy(props2, props, sizeof(struct kfd_iolink_properties));
> +			props2->node_from = i;
> +			props2->node_to = gpu_node;
> +			props2->kobj = NULL;
> +			cpu_dev->node_props.p2p_links_count++;
> +			list_add_tail(&props2->list, &cpu_dev->p2p_link_props);
> +			ret = kfd_build_p2p_node_entry(cpu_dev, props2);
> +			if (ret < 0)
> +				return ret;
> +		}
> +	}
> +	return ret;
> +}
> +
> +#if defined(CONFIG_HSA_AMD_P2P)
> +static int kfd_add_peer_prop(struct kfd_topology_device *kdev,
> +		struct kfd_topology_device *peer, int from, int to)
> +{
> +	struct kfd_iolink_properties *props = NULL;
> +	struct kfd_iolink_properties *iolink1, *iolink2, *iolink3;
> +	struct kfd_topology_device *cpu_dev;
> +	int ret = 0;
> +
> +	if (!amdgpu_device_is_peer_accessible(
> +				kdev->gpu->adev,
> +				peer->gpu->adev))
> +		return ret;
> +
> +	iolink1 = list_first_entry(&kdev->io_link_props,
> +							struct kfd_iolink_properties, list);
> +	if (!iolink1)
> +		return -ENOMEM;
> +
> +	iolink2 = list_first_entry(&peer->io_link_props,
> +							struct kfd_iolink_properties, list);
> +	if (!iolink2)
> +		return -ENOMEM;
> +
> +	props = kfd_alloc_struct(props);
> +	if (!props)
> +		return -ENOMEM;
> +
> +	memcpy(props, iolink1, sizeof(struct kfd_iolink_properties));
> +
> +	props->weight = iolink1->weight + iolink2->weight;
> +	props->min_latency = iolink1->min_latency + iolink2->min_latency;
> +	props->max_latency = iolink1->max_latency + iolink2->max_latency;
> +	props->min_bandwidth = min(iolink1->min_bandwidth, iolink2->min_bandwidth);
> +	props->max_bandwidth = min(iolink2->max_bandwidth, iolink2->max_bandwidth);
> +
> +	if (iolink1->node_to != iolink2->node_to) {
> +		/* CPU->CPU  link*/
> +		cpu_dev = kfd_topology_device_by_proximity_domain(iolink1->node_to);
> +		if (cpu_dev) {
> +			list_for_each_entry(iolink3, &cpu_dev->io_link_props, list)
> +				if (iolink3->node_to == iolink2->node_to)
> +					break;
> +
> +			props->weight += iolink3->weight;
> +			props->min_latency += iolink3->min_latency;
> +			props->max_latency += iolink3->max_latency;
> +			props->min_bandwidth = min(props->min_bandwidth,
> +							iolink3->min_bandwidth);
> +			props->max_bandwidth = min(props->max_bandwidth,
> +							iolink3->max_bandwidth);
> +		} else {
> +			WARN(1, "CPU node not found");
> +		}
> +	}
> +
> +	props->node_from = from;
> +	props->node_to = to;
> +	peer->node_props.p2p_links_count++;
> +	list_add_tail(&props->list, &peer->p2p_link_props);
> +	ret = kfd_build_p2p_node_entry(peer, props);
> +
> +	return ret;
> +}
> +#endif
> +
> +static int kfd_dev_create_p2p_links(void)
> +{
> +	struct kfd_topology_device *dev;
> +	struct kfd_topology_device *new_dev;
> +	uint32_t i, k;
> +	int ret = 0;
> +
> +	k = 0;
> +	list_for_each_entry(dev, &topology_device_list, list)
> +		k++;
> +	if (k < 2)
> +		return 0;
> +
> +	new_dev = list_last_entry(&topology_device_list, struct kfd_topology_device, list);
> +	if (WARN_ON(!new_dev->gpu))
> +		return 0;
> +
> +	k--;
> +	i = 0;
> +
> +	/* create in-direct links */
> +	ret = kfd_create_indirect_link_prop(new_dev, k);
> +	if (ret < 0)
> +		goto out;
> +
> +	/* create p2p links */
> +#if defined(CONFIG_HSA_AMD_P2P)
> +	list_for_each_entry(dev, &topology_device_list, list) {
> +		if (dev == new_dev)
> +			break;
> +		if (!dev->gpu || !dev->gpu->adev ||
> +		    (dev->gpu->hive_id &&
> +		     dev->gpu->hive_id == new_dev->gpu->hive_id))
> +			goto next;
> +
> +		/* check if node(s) is/are peer accessible in one direction or bi-direction */
> +		ret = kfd_add_peer_prop(new_dev, dev, i, k);
> +		if (ret < 0)
> +			goto out;
> +
> +		ret = kfd_add_peer_prop(dev, new_dev, k, i);
> +		if (ret < 0)
> +			goto out;
> +next:
> +		i++;
> +	}
> +#endif
> +
> +out:
> +	return ret;
>   }
>   
>   int kfd_topology_add_device(struct kfd_dev *gpu)
> @@ -1305,7 +1603,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   	INIT_LIST_HEAD(&temp_topology_device_list);
>   
>   	gpu_id = kfd_generate_gpu_id(gpu);
> -
>   	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
>   
>   	/* Check to see if this gpu device exists in the topology_device_list.
> @@ -1362,6 +1659,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   	dev->gpu_id = gpu_id;
>   	gpu->id = gpu_id;
>   
> +	kfd_dev_create_p2p_links();
> +
>   	/* TODO: Move the following lines to function
>   	 *	kfd_add_non_crat_information
>   	 */
> @@ -1507,7 +1806,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   static void kfd_topology_update_io_links(int proximity_domain)
>   {
>   	struct kfd_topology_device *dev;
> -	struct kfd_iolink_properties *iolink, *tmp;
> +	struct kfd_iolink_properties *iolink, *p2plink, *tmp;
>   
>   	list_for_each_entry(dev, &topology_device_list, list) {
>   		if (dev->proximity_domain > proximity_domain)
> @@ -1529,6 +1828,22 @@ static void kfd_topology_update_io_links(int proximity_domain)
>   					iolink->node_to--;
>   			}
>   		}
> +
> +		list_for_each_entry_safe(p2plink, tmp, &dev->p2p_link_props, list) {
> +			/*
> +			 * If there is a p2p link to the dev being deleted
> +			 * then remove that p2p link also.
> +			 */
> +			if (p2plink->node_to == proximity_domain) {
> +				list_del(&p2plink->list);
> +				dev->node_props.p2p_links_count--;
> +			} else {
> +				if (p2plink->node_from > proximity_domain)
> +					p2plink->node_from--;
> +				if (p2plink->node_to > proximity_domain)
> +					p2plink->node_to--;
> +			}
> +		}
>   	}
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> index 4f80d2ea1000..2fb5664e1041 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
> @@ -38,6 +38,7 @@ struct kfd_node_properties {
>   	uint32_t mem_banks_count;
>   	uint32_t caches_count;
>   	uint32_t io_links_count;
> +	uint32_t p2p_links_count;
>   	uint32_t cpu_core_id_base;
>   	uint32_t simd_id_base;
>   	uint32_t capability;
> @@ -131,12 +132,14 @@ struct kfd_topology_device {
>   	struct list_head		cache_props;
>   	uint32_t			io_link_count;
>   	struct list_head		io_link_props;
> +	struct list_head		p2p_link_props;
>   	struct list_head		perf_props;
>   	struct kfd_dev			*gpu;
>   	struct kobject			*kobj_node;
>   	struct kobject			*kobj_mem;
>   	struct kobject			*kobj_cache;
>   	struct kobject			*kobj_iolink;
> +	struct kobject			*kobj_p2plink;
>   	struct kobject			*kobj_perf;
>   	struct attribute		attr_gpuid;
>   	struct attribute		attr_name;


More information about the amd-gfx mailing list