[PATCH] drm/amdkfd: Release the topology_lock in error case
Felix Kuehling
felix.kuehling at amd.com
Mon Nov 21 14:06:48 UTC 2022
Am 2022-11-21 um 00:13 schrieb Ma Jun:
> From: Felix Kuehling <felix.kuehling at gmail.com>
>
> Move the topology-locked part of kfd_topology_add_device into a separate
> function to simlpify error handling and release the topology lock
> consistently.
>
> Reported-by: Dan Carpenter <error27 at gmail.com>
> Signed-off-by: Felix Kuehling <felix.kuehling at gmail.com>
> Signed-off-by: Ma Jun <Jun.Ma2 at amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 120 ++++++++++++----------
> 1 file changed, 65 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> index 8c555c32ea70..7d6fbfbfeb79 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
> @@ -1938,21 +1938,75 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
> pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
> }
>
> +static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id,
> + struct kfd_topology_device **dev)
> +{
> + int proximity_domain = ++topology_crat_proximity_domain;
> + struct list_head temp_topology_device_list;
> + void *crat_image = NULL;
> + size_t image_size = 0;
> + int res;
> +
> + res = kfd_create_crat_image_virtual(&crat_image, &image_size,
> + COMPUTE_UNIT_GPU, gpu,
> + proximity_domain);
> + if (res) {
> + pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
> + gpu_id);
> + topology_crat_proximity_domain--;
> + goto err;
> + }
> +
> + INIT_LIST_HEAD(&temp_topology_device_list);
> +
> + res = kfd_parse_crat_table(crat_image,
> + &temp_topology_device_list,
> + proximity_domain);
> + if (res) {
> + pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
> + gpu_id);
> + topology_crat_proximity_domain--;
> + goto err;
> + }
> +
> + kfd_topology_update_device_list(&temp_topology_device_list,
> + &topology_device_list);
> +
> + *dev = kfd_assign_gpu(gpu);
> + if (WARN_ON(!*dev)) {
> + res = -ENODEV;
> + goto err;
> + }
> +
> + /* Fill the cache affinity information here for the GPUs
> + * using VCRAT
> + */
> + kfd_fill_cache_non_crat_info(*dev, gpu);
> +
> + /* Update the SYSFS tree, since we added another topology
> + * device
> + */
> + res = kfd_topology_update_sysfs();
> + if (!res)
> + sys_props.generation_count++;
> + else
> + pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
> + gpu_id, res);
> +
> +err:
> + kfd_destroy_crat_image(crat_image);
> + return res;
> +}
> +
> int kfd_topology_add_device(struct kfd_dev *gpu)
> {
> uint32_t gpu_id;
> struct kfd_topology_device *dev;
> struct kfd_cu_info cu_info;
> int res = 0;
> - struct list_head temp_topology_device_list;
> - void *crat_image = NULL;
> - size_t image_size = 0;
> - int proximity_domain;
> int i;
> const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
>
> - INIT_LIST_HEAD(&temp_topology_device_list);
> -
> gpu_id = kfd_generate_gpu_id(gpu);
> pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
>
> @@ -1964,54 +2018,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
> */
> down_write(&topology_lock);
> dev = kfd_assign_gpu(gpu);
> - if (!dev) {
> - proximity_domain = ++topology_crat_proximity_domain;
> -
> - res = kfd_create_crat_image_virtual(&crat_image, &image_size,
> - COMPUTE_UNIT_GPU, gpu,
> - proximity_domain);
> - if (res) {
> - pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
> - gpu_id);
> - topology_crat_proximity_domain--;
> - return res;
> - }
> -
> - res = kfd_parse_crat_table(crat_image,
> - &temp_topology_device_list,
> - proximity_domain);
> - if (res) {
> - pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
> - gpu_id);
> - topology_crat_proximity_domain--;
> - goto err;
> - }
> -
> - kfd_topology_update_device_list(&temp_topology_device_list,
> - &topology_device_list);
> -
> - dev = kfd_assign_gpu(gpu);
> - if (WARN_ON(!dev)) {
> - res = -ENODEV;
> - goto err;
> - }
> -
> - /* Fill the cache affinity information here for the GPUs
> - * using VCRAT
> - */
> - kfd_fill_cache_non_crat_info(dev, gpu);
> -
> - /* Update the SYSFS tree, since we added another topology
> - * device
> - */
> - res = kfd_topology_update_sysfs();
> - if (!res)
> - sys_props.generation_count++;
> - else
> - pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
> - gpu_id, res);
> - }
> + if (!dev)
> + res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
> up_write(&topology_lock);
> + if (res)
> + return res;
>
> dev->gpu_id = gpu_id;
> gpu->id = gpu_id;
> @@ -2134,8 +2145,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
>
> if (!res)
> kfd_notify_gpu_change(gpu_id, 1);
> -err:
> - kfd_destroy_crat_image(crat_image);
> +
> return res;
> }
>
More information about the amd-gfx
mailing list