[PATCH V2 08/10] amdkfd: identify a secondary kfd process by its id
Felix Kuehling
felix.kuehling at amd.com
Fri Aug 1 16:05:24 UTC 2025
On 2025-08-01 4:55, Zhu Lingshan wrote:
> This commit introduces a new id field for
> struct kfd process, which helps identify
> a kfd process among multiple contexts that
> all belong to a single user space program.
>
> The sysfs entry of a secondary kfd process
> is placed under the sysfs entry folder of
> its primary kfd process.
>
> The naming format of the sysfs entry of a secondary
> kfd process is "context_%u" where %u is the process id.
>
> Signed-off-by: Zhu Lingshan <lingshan.zhu at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 83 +++++++++++++++++++++++-
> 2 files changed, 86 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index de701d72aa5c..a6e12c705734 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -995,6 +995,9 @@ struct kfd_process {
> /* Tracks debug per-vmid request for debug flags */
> u32 dbg_flags;
>
> + /* kfd process id */
> + u16 id;
> +
> atomic_t poison;
> /* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
> bool queues_paused;
> @@ -1009,6 +1012,9 @@ struct kfd_process {
>
> /* indicating whether this is a primary kfd_process */
> bool primary;
> +
> + /* The primary kfd_process allocating IDs for its secondary kfd_process, 0 for primary kfd_process */
> + struct ida id_table;
> };
>
> #define KFD_PROCESS_TABLE_SIZE 8 /* bits: 256 entries */
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 440fde75d1e4..117e524f4fb3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -54,6 +54,9 @@ DEFINE_MUTEX(kfd_processes_mutex);
>
> DEFINE_SRCU(kfd_processes_srcu);
>
> +#define KFD_PROCESS_ID_MIN 1
> +#define KFD_PROCESS_ID_WIDTH 16
> +
> /* For process termination handling */
> static struct workqueue_struct *kfd_process_wq;
>
> @@ -827,6 +830,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
>
> int kfd_create_process_sysfs(struct kfd_process *process)
> {
> + struct kfd_process *primary_process;
> int ret;
>
> if (process->kobj) {
> @@ -839,9 +843,22 @@ int kfd_create_process_sysfs(struct kfd_process *process)
> pr_warn("Creating procfs kobject failed");
> return -ENOMEM;
> }
> - ret = kobject_init_and_add(process->kobj, &procfs_type,
> - procfs.kobj, "%d",
> - (int)process->lead_thread->pid);
> +
> + if (process->primary)
> + ret = kobject_init_and_add(process->kobj, &procfs_type,
> + procfs.kobj, "%d",
> + (int)process->lead_thread->pid);
> + else {
> + primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
> + if (!primary_process)
> + return -ESRCH;
> +
> + ret = kobject_init_and_add(process->kobj, &procfs_type,
> + primary_process->kobj, "context_%u",
> + process->id);
> + kfd_unref_process(primary_process);
> + }
> +
> if (ret) {
> pr_warn("Creating procfs pid directory failed");
> kobject_put(process->kobj);
> @@ -863,6 +880,51 @@ int kfd_create_process_sysfs(struct kfd_process *process)
> return 0;
> }
>
> +static int kfd_process_alloc_id(struct kfd_process *process)
> +{
> + u16 ret;
> + struct kfd_process *primary_process;
> +
> + if (process->primary) {
> + process->id = 0;
> +
> + return 0;
> + }
> +
> + primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
> + if (!primary_process)
> + return -ESRCH;
> +
> + ret = ida_alloc_range(&primary_process->id_table, KFD_PROCESS_ID_MIN,
> + (1 << KFD_PROCESS_ID_WIDTH) - 1, GFP_KERNEL);
> + if (ret < 0)
Ret is an unsigned type. It cannot be < 0. ida_alloc_range returns int. I'd recommend making ret an int as well.
> + goto out;
> +
> + process->id = ret;
> + ret = 0;
> +
> +out:
> + kfd_unref_process(primary_process);
> +
> + return ret;
> +}
> +
> +static void kfd_process_free_id(struct kfd_process *process)
> +{
> + struct kfd_process *primary_process;
> +
> + if (process->primary)
> + return;
> +
> + primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
> + if (!primary_process)
> + return;
> +
> + ida_free(&primary_process->id_table, process->id);
> +
> + kfd_unref_process(primary_process);
> +}
> +
> struct kfd_process *kfd_create_process(struct task_struct *thread)
> {
> struct kfd_process *process;
> @@ -1193,6 +1255,11 @@ static void kfd_process_wq_release(struct work_struct *work)
> if (ef)
> dma_fence_signal(ef);
>
> + if (!p->primary)
> + kfd_process_free_id(p);
> + else
> + ida_destroy(&p->id_table);
> +
> kfd_process_remove_sysfs(p);
> kfd_debugfs_remove_process(p);
>
> @@ -1549,6 +1616,12 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
> process->queues_paused = false;
> process->primary = primary;
>
> + err = kfd_process_alloc_id(process);
> + if (err) {
> + pr_err("Creating kfd process: failed to alloc an id\n");
> + goto err_alloc_id;
> + }
> +
> INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
> INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
> process->last_restore_timestamp = get_jiffies_64();
> @@ -1599,6 +1672,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
> goto err_register_notifier;
> }
> BUG_ON(mn != &process->mmu_notifier);
> +
> + ida_init(&process->id_table);
You should only init this for the primary process since you're only calling ida_destroy on the primary process.
Regards,
Felix
> }
>
> kfd_unref_process(process);
> @@ -1619,6 +1694,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
> err_process_pqm_init:
> kfd_event_free_process(process);
> err_event_init:
> + kfd_process_free_id(process);
> +err_alloc_id:
> mutex_destroy(&process->mutex);
> kfree(process);
> err_alloc_process:
More information about the amd-gfx
mailing list