[PATCH V2 08/10] amdkfd: identify a secondary kfd process by its id

Zhu, Lingshan lingshan.zhu at amd.com
Mon Aug 4 06:13:13 UTC 2025


On 8/2/2025 12:05 AM, Felix Kuehling wrote:

> On 2025-08-01 4:55, Zhu Lingshan wrote:
>> This commit introduces a new id field for
>> struct kfd process, which helps identify
>> a kfd process among multiple contexts that
>> all belong to a single user space program.
>>
>> The sysfs entry of a secondary kfd process
>> is placed under the sysfs entry folder of
>> its primary kfd process.
>>
>> The naming format of the sysfs entry of a secondary
>> kfd process is "context_%u" where %u is the process id.
>>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu at amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ++
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c | 83 +++++++++++++++++++++++-
>>  2 files changed, 86 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index de701d72aa5c..a6e12c705734 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -995,6 +995,9 @@ struct kfd_process {
>>  	/* Tracks debug per-vmid request for debug flags */
>>  	u32 dbg_flags;
>>  
>> +	/* kfd process id */
>> +	u16 id;
>> +
>>  	atomic_t poison;
>>  	/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
>>  	bool queues_paused;
>> @@ -1009,6 +1012,9 @@ struct kfd_process {
>>  
>>  	/* indicating whether this is a primary kfd_process */
>>  	bool primary;
>> +
>> +	/* The primary kfd_process allocating IDs for its secondary kfd_process, 0 for primary kfd_process */
>> +	struct ida id_table;
>>  };
>>  
>>  #define KFD_PROCESS_TABLE_SIZE 8 /* bits: 256 entries */
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 440fde75d1e4..117e524f4fb3 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -54,6 +54,9 @@ DEFINE_MUTEX(kfd_processes_mutex);
>>  
>>  DEFINE_SRCU(kfd_processes_srcu);
>>  
>> +#define KFD_PROCESS_ID_MIN 1
>> +#define KFD_PROCESS_ID_WIDTH 16
>> +
>>  /* For process termination handling */
>>  static struct workqueue_struct *kfd_process_wq;
>>  
>> @@ -827,6 +830,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
>>  
>>  int kfd_create_process_sysfs(struct kfd_process *process)
>>  {
>> +	struct kfd_process *primary_process;
>>  	int ret;
>>  
>>  	if (process->kobj) {
>> @@ -839,9 +843,22 @@ int kfd_create_process_sysfs(struct kfd_process *process)
>>  		pr_warn("Creating procfs kobject failed");
>>  		return -ENOMEM;
>>  	}
>> -	ret = kobject_init_and_add(process->kobj, &procfs_type,
>> -				   procfs.kobj, "%d",
>> -				   (int)process->lead_thread->pid);
>> +
>> +	if (process->primary)
>> +		ret = kobject_init_and_add(process->kobj, &procfs_type,
>> +					   procfs.kobj, "%d",
>> +					   (int)process->lead_thread->pid);
>> +	else {
>> +		primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
>> +		if (!primary_process)
>> +			return -ESRCH;
>> +
>> +		ret = kobject_init_and_add(process->kobj, &procfs_type,
>> +					   primary_process->kobj, "context_%u",
>> +					   process->id);
>> +		kfd_unref_process(primary_process);
>> +	}
>> +
>>  	if (ret) {
>>  		pr_warn("Creating procfs pid directory failed");
>>  		kobject_put(process->kobj);
>> @@ -863,6 +880,51 @@ int kfd_create_process_sysfs(struct kfd_process *process)
>>  	return 0;
>>  }
>>  
>> +static int kfd_process_alloc_id(struct kfd_process *process)
>> +{
>> +	u16 ret;
>> +	struct kfd_process *primary_process;
>> +
>> +	if (process->primary) {
>> +		process->id = 0;
>> +
>> +		return 0;
>> +	}
>> +
>> +	primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
>> +	if (!primary_process)
>> +		return -ESRCH;
>> +
>> +	ret = ida_alloc_range(&primary_process->id_table, KFD_PROCESS_ID_MIN,
>> +	     (1 << KFD_PROCESS_ID_WIDTH) - 1, GFP_KERNEL);
>> +	if (ret < 0)
> Ret is an unsigned type. It cannot be < 0. ida_alloc_range returns int. I'd recommend making ret an int as well.

Yes, will fix

>
>
>> +		goto out;
>> +
>> +	process->id = ret;
>> +	ret = 0;
>> +
>> +out:
>> +	kfd_unref_process(primary_process);
>> +
>> +	return ret;
>> +}
>> +
>> +static void kfd_process_free_id(struct kfd_process *process)
>> +{
>> +	struct kfd_process *primary_process;
>> +
>> +	if (process->primary)
>> +		return;
>> +
>> +	primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
>> +	if (!primary_process)
>> +		return;
>> +
>> +	ida_free(&primary_process->id_table, process->id);
>> +
>> +	kfd_unref_process(primary_process);
>> +}
>> +
>>  struct kfd_process *kfd_create_process(struct task_struct *thread)
>>  {
>>  	struct kfd_process *process;
>> @@ -1193,6 +1255,11 @@ static void kfd_process_wq_release(struct work_struct *work)
>>  	if (ef)
>>  		dma_fence_signal(ef);
>>  
>> +	if (!p->primary)
>> +		kfd_process_free_id(p);
>> +	else
>> +		ida_destroy(&p->id_table);
>> +
>>  	kfd_process_remove_sysfs(p);
>>  	kfd_debugfs_remove_process(p);
>>  
>> @@ -1549,6 +1616,12 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
>>  	process->queues_paused = false;
>>  	process->primary = primary;
>>  
>> +	err = kfd_process_alloc_id(process);
>> +	if (err) {
>> +		pr_err("Creating kfd process: failed to alloc an id\n");
>> +		goto err_alloc_id;
>> +	}
>> +
>>  	INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
>>  	INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
>>  	process->last_restore_timestamp = get_jiffies_64();
>> @@ -1599,6 +1672,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
>>  			goto err_register_notifier;
>>  		}
>>  		BUG_ON(mn != &process->mmu_notifier);
>> +
>> +		ida_init(&process->id_table);
> You should only init this for the primary process since you're only calling ida_destroy on the primary process.

The code has been truncated when generating the patch,
we only init the id_table for the primary kfd_process,
the complete piece of code is:  

        if (primary) {
                mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
                if (IS_ERR(mn)) {
                        err = PTR_ERR(mn);
                        goto err_register_notifier;
                }
                BUG_ON(mn != &process->mmu_notifier);

                ida_init(&process->id_table);
        }
 

Thanks!
Lingshan

>
> Regards,
>   Felix
>
>
>>  	}
>>  
>>  	kfd_unref_process(process);
>> @@ -1619,6 +1694,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
>>  err_process_pqm_init:
>>  	kfd_event_free_process(process);
>>  err_event_init:
>> +	kfd_process_free_id(process);
>> +err_alloc_id:
>>  	mutex_destroy(&process->mutex);
>>  	kfree(process);
>>  err_alloc_process:
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250804/78271b66/attachment.htm>


More information about the amd-gfx mailing list