[PATCH V2 08/10] amdkfd: identify a secondary kfd process by its id

Felix Kuehling felix.kuehling at amd.com
Fri Aug 1 16:05:24 UTC 2025


On 2025-08-01 4:55, Zhu Lingshan wrote:
> This commit introduces a new id field for
> struct kfd process, which helps identify
> a kfd process among multiple contexts that
> all belong to a single user space program.
>
> The sysfs entry of a secondary kfd process
> is placed under the sysfs entry folder of
> its primary kfd process.
>
> The naming format of the sysfs entry of a secondary
> kfd process is "context_%u" where %u is the process id.
>
> Signed-off-by: Zhu Lingshan <lingshan.zhu at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 83 +++++++++++++++++++++++-
>  2 files changed, 86 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index de701d72aa5c..a6e12c705734 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -995,6 +995,9 @@ struct kfd_process {
>  	/* Tracks debug per-vmid request for debug flags */
>  	u32 dbg_flags;
>  
> +	/* kfd process id */
> +	u16 id;
> +
>  	atomic_t poison;
>  	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
>  	bool queues_paused;
> @@ -1009,6 +1012,9 @@ struct kfd_process {
>  
>  	/* indicating whether this is a primary kfd_process */
>  	bool primary;
> +
> +	/* The primary kfd_process allocating IDs for its secondary kfd_process, 0 for primary kfd_process */
> +	struct ida id_table;
>  };
>  
>  #define KFD_PROCESS_TABLE_SIZE 8 /* bits: 256 entries */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 440fde75d1e4..117e524f4fb3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -54,6 +54,9 @@ DEFINE_MUTEX(kfd_processes_mutex);
>  
>  DEFINE_SRCU(kfd_processes_srcu);
>  
> +#define KFD_PROCESS_ID_MIN 1
> +#define KFD_PROCESS_ID_WIDTH 16
> +
>  /* For process termination handling */
>  static struct workqueue_struct *kfd_process_wq;
>  
> @@ -827,6 +830,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
>  
>  int kfd_create_process_sysfs(struct kfd_process *process)
>  {
> +	struct kfd_process *primary_process;
>  	int ret;
>  
>  	if (process->kobj) {
> @@ -839,9 +843,22 @@ int kfd_create_process_sysfs(struct kfd_process *process)
>  		pr_warn("Creating procfs kobject failed");
>  		return -ENOMEM;
>  	}
> -	ret = kobject_init_and_add(process->kobj, &procfs_type,
> -				   procfs.kobj, "%d",
> -				   (int)process->lead_thread->pid);
> +
> +	if (process->primary)
> +		ret = kobject_init_and_add(process->kobj, &procfs_type,
> +					   procfs.kobj, "%d",
> +					   (int)process->lead_thread->pid);
> +	else {
> +		primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
> +		if (!primary_process)
> +			return -ESRCH;
> +
> +		ret = kobject_init_and_add(process->kobj, &procfs_type,
> +					   primary_process->kobj, "context_%u",
> +					   process->id);
> +		kfd_unref_process(primary_process);
> +	}
> +
>  	if (ret) {
>  		pr_warn("Creating procfs pid directory failed");
>  		kobject_put(process->kobj);
> @@ -863,6 +880,51 @@ int kfd_create_process_sysfs(struct kfd_process *process)
>  	return 0;
>  }
>  
> +static int kfd_process_alloc_id(struct kfd_process *process)
> +{
> +	u16 ret;
> +	struct kfd_process *primary_process;
> +
> +	if (process->primary) {
> +		process->id = 0;
> +
> +		return 0;
> +	}
> +
> +	primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
> +	if (!primary_process)
> +		return -ESRCH;
> +
> +	ret = ida_alloc_range(&primary_process->id_table, KFD_PROCESS_ID_MIN,
> +	     (1 << KFD_PROCESS_ID_WIDTH) - 1, GFP_KERNEL);
> +	if (ret < 0)

Ret is an unsigned type. It cannot be < 0. ida_alloc_range returns int. I'd recommend making ret an int as well.


> +		goto out;
> +
> +	process->id = ret;
> +	ret = 0;
> +
> +out:
> +	kfd_unref_process(primary_process);
> +
> +	return ret;
> +}
> +
> +static void kfd_process_free_id(struct kfd_process *process)
> +{
> +	struct kfd_process *primary_process;
> +
> +	if (process->primary)
> +		return;
> +
> +	primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
> +	if (!primary_process)
> +		return;
> +
> +	ida_free(&primary_process->id_table, process->id);
> +
> +	kfd_unref_process(primary_process);
> +}
> +
>  struct kfd_process *kfd_create_process(struct task_struct *thread)
>  {
>  	struct kfd_process *process;
> @@ -1193,6 +1255,11 @@ static void kfd_process_wq_release(struct work_struct *work)
>  	if (ef)
>  		dma_fence_signal(ef);
>  
> +	if (!p->primary)
> +		kfd_process_free_id(p);
> +	else
> +		ida_destroy(&p->id_table);
> +
>  	kfd_process_remove_sysfs(p);
>  	kfd_debugfs_remove_process(p);
>  
> @@ -1549,6 +1616,12 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
>  	process->queues_paused = false;
>  	process->primary = primary;
>  
> +	err = kfd_process_alloc_id(process);
> +	if (err) {
> +		pr_err("Creating kfd process: failed to alloc an id\n");
> +		goto err_alloc_id;
> +	}
> +
>  	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
>  	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
>  	process->last_restore_timestamp = get_jiffies_64();
> @@ -1599,6 +1672,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
>  			goto err_register_notifier;
>  		}
>  		BUG_ON(mn != &process->mmu_notifier);
> +
> +		ida_init(&process->id_table);

You should only init this for the primary process since you're only calling ida_destroy on the primary process.

Regards,
  Felix


>  	}
>  
>  	kfd_unref_process(process);
> @@ -1619,6 +1694,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
>  err_process_pqm_init:
>  	kfd_event_free_process(process);
>  err_event_init:
> +	kfd_process_free_id(process);
> +err_alloc_id:
>  	mutex_destroy(&process->mutex);
>  	kfree(process);
>  err_alloc_process:


More information about the amd-gfx mailing list