[PATCHv2] drm/amdkfd: Cleanup IO links during KFD device removal

Felix Kuehling felix.kuehling at amd.com
Tue Apr 12 02:30:15 UTC 2022


Am 2022-04-11 um 22:15 schrieb Mukul Joshi:
> Currently, the IO-links to the device being removed from topology,
> are not cleared. As a result, there would be dangling links left in
> the KFD topology. This patch aims to fix the following:
> 1. Cleanup all IO links to the device being removed.
> 2. Ensure that node numbering in sysfs and nodes proximity domain
>     values are consistent after the device is removed:
>     a. Adding a device and removing a GPU device are made mutually
>        exclusive.
>     b. The global proximity domain counter is no longer required to be
>        an atomic counter. A normal 32-bit counter can be used instead.
> 3. Update generation_count to let user-mode know that topology has
>     changed due to device removal.
>
> CC: Shuotao Xu <shuotaoxu at microsoft.com>
> Signed-off-by: Mukul Joshi <mukul.joshi at amd.com>
> Reviewed-by: Shuotao Xu <shuotaoxu at microsoft.com>

Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
> v1->v2:
> - Remove comments from inside kfd_topology_update_io_links()
>    and add them as kernel-doc comments.
>
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c     |  4 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h     |  2 +
>   drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 83 ++++++++++++++++++++---
>   3 files changed, 78 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 1eaabd2cb41b..afc8a7fcdad8 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -1056,7 +1056,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,
>   	 * table, add corresponded reversed direction link now.
>   	 */
>   	if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) {
> -		to_dev = kfd_topology_device_by_proximity_domain(id_to);
> +		to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to);
>   		if (!to_dev)
>   			return -ENODEV;
>   		/* same everything but the other direction */
> @@ -2225,7 +2225,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
>   	 */
>   	if (kdev->hive_id) {
>   		for (nid = 0; nid < proximity_domain; ++nid) {
> -			peer_dev = kfd_topology_device_by_proximity_domain(nid);
> +			peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
>   			if (!peer_dev->gpu)
>   				continue;
>   			if (peer_dev->gpu->hive_id != kdev->hive_id)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index e1b7e6afa920..8a43def1f638 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1016,6 +1016,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu);
>   int kfd_topology_remove_device(struct kfd_dev *gpu);
>   struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
>   						uint32_t proximity_domain);
> +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
> +						uint32_t proximity_domain);
>   struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
>   struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
>   struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 3bdcae239bc0..98a51847cd8c 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -46,22 +46,32 @@ static struct list_head topology_device_list;
>   static struct kfd_system_properties sys_props;
>   
>   static DECLARE_RWSEM(topology_lock);
> -static atomic_t topology_crat_proximity_domain;
> +static uint32_t topology_crat_proximity_domain;
>   
> -struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
> +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock(
>   						uint32_t proximity_domain)
>   {
>   	struct kfd_topology_device *top_dev;
>   	struct kfd_topology_device *device = NULL;
>   
> -	down_read(&topology_lock);
> -
>   	list_for_each_entry(top_dev, &topology_device_list, list)
>   		if (top_dev->proximity_domain == proximity_domain) {
>   			device = top_dev;
>   			break;
>   		}
>   
> +	return device;
> +}
> +
> +struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
> +						uint32_t proximity_domain)
> +{
> +	struct kfd_topology_device *device = NULL;
> +
> +	down_read(&topology_lock);
> +
> +	device = kfd_topology_device_by_proximity_domain_no_lock(
> +							proximity_domain);
>   	up_read(&topology_lock);
>   
>   	return device;
> @@ -1060,7 +1070,7 @@ int kfd_topology_init(void)
>   	down_write(&topology_lock);
>   	kfd_topology_update_device_list(&temp_topology_device_list,
>   					&topology_device_list);
> -	atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1);
> +	topology_crat_proximity_domain = sys_props.num_devices-1;
>   	ret = kfd_topology_update_sysfs();
>   	up_write(&topology_lock);
>   
> @@ -1295,8 +1305,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   
>   	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
>   
> -	proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
> -
>   	/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
>   	if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) {
>   		struct kfd_topology_device *top_dev;
> @@ -1321,12 +1329,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   	 */
>   	dev = kfd_assign_gpu(gpu);
>   	if (!dev) {
> +		down_write(&topology_lock);
> +		proximity_domain = ++topology_crat_proximity_domain;
> +
>   		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
>   						    COMPUTE_UNIT_GPU, gpu,
>   						    proximity_domain);
>   		if (res) {
>   			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
>   			       gpu_id);
> +			topology_crat_proximity_domain--;
>   			return res;
>   		}
>   		res = kfd_parse_crat_table(crat_image,
> @@ -1335,10 +1347,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   		if (res) {
>   			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
>   			       gpu_id);
> +			topology_crat_proximity_domain--;
>   			goto err;
>   		}
>   
> -		down_write(&topology_lock);
>   		kfd_topology_update_device_list(&temp_topology_device_list,
>   			&topology_device_list);
>   
> @@ -1485,25 +1497,78 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>   	return res;
>   }
>   
> +/**
> + * kfd_topology_update_io_links() - Update IO links after device removal.
> + * @proximity_domain: Proximity domain value of the dev being removed.
> + *
> + * The topology list currently is arranged in increasing order of
> + * proximity domain.
> + *
> + * Two things need to be done when a device is removed:
> + * 1. All the IO links to this device need to be removed.
> + * 2. All nodes after the current device node need to move
> + *    up once this device node is removed from the topology
> + *    list. As a result, the proximity domain values for
> + *    all nodes after the node being deleted reduce by 1.
> + *    This would also cause the proximity domain values for
> + *    io links to be updated based on new proximity domain
> + *    values.
> + *
> + * Context: The called must hold write topology_lock.
> + */
> +static void kfd_topology_update_io_links(int proximity_domain)
> +{
> +	struct kfd_topology_device *dev;
> +	struct kfd_iolink_properties *iolink, *p2plink, *tmp;
> +
> +	list_for_each_entry(dev, &topology_device_list, list) {
> +		if (dev->proximity_domain > proximity_domain)
> +			dev->proximity_domain--;
> +
> +		list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) {
> +			/*
> +			 * If there is an io link to the dev being deleted
> +			 * then remove that IO link also.
> +			 */
> +			if (iolink->node_to == proximity_domain) {
> +				list_del(&iolink->list);
> +				dev->io_link_count--;
> +				dev->node_props.io_links_count--;
> +			} else if (iolink->node_from > proximity_domain) {
> +				iolink->node_from--;
> +			} else if (iolink->node_to > proximity_domain) {
> +				iolink->node_to--;
> +			}
> +		}
> +
> +	}
> +}
> +
>   int kfd_topology_remove_device(struct kfd_dev *gpu)
>   {
>   	struct kfd_topology_device *dev, *tmp;
>   	uint32_t gpu_id;
>   	int res = -ENODEV;
> +	int i = 0;
>   
>   	down_write(&topology_lock);
>   
> -	list_for_each_entry_safe(dev, tmp, &topology_device_list, list)
> +	list_for_each_entry_safe(dev, tmp, &topology_device_list, list) {
>   		if (dev->gpu == gpu) {
>   			gpu_id = dev->gpu_id;
>   			kfd_remove_sysfs_node_entry(dev);
>   			kfd_release_topology_device(dev);
>   			sys_props.num_devices--;
> +			kfd_topology_update_io_links(i);
> +			topology_crat_proximity_domain = sys_props.num_devices-1;
> +			sys_props.generation_count++;
>   			res = 0;
>   			if (kfd_topology_update_sysfs() < 0)
>   				kfd_topology_release_sysfs();
>   			break;
>   		}
> +		i++;
> +	}
>   
>   	up_write(&topology_lock);
>   


More information about the amd-gfx mailing list