<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body>
<pre>On 8/2/2025 12:05 AM, Felix Kuehling wrote:</pre>
<blockquote type="cite" cite="mid:5324e3ed-514c-443e-835e-3741d102fae6@amd.com">
<pre wrap="" class="moz-quote-pre">
On 2025-08-01 4:55, Zhu Lingshan wrote:
</pre>
<blockquote type="cite">
<pre wrap="" class="moz-quote-pre">This commit introduces a new id field for
struct kfd process, which helps identify
a kfd process among multiple contexts that
all belong to a single user space program.
The sysfs entry of a secondary kfd process
is placed under the sysfs entry folder of
its primary kfd process.
The naming format of the sysfs entry of a secondary
kfd process is "context_%u" where %u is the process id.
Signed-off-by: Zhu Lingshan <a class="moz-txt-link-rfc2396E" href="mailto:lingshan.zhu@amd.com"><lingshan.zhu@amd.com></a>
---
drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++
drivers/gpu/drm/amd/amdkfd/kfd_process.c | 83 +++++++++++++++++++++++-
2 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index de701d72aa5c..a6e12c705734 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -995,6 +995,9 @@ struct kfd_process {
/* Tracks debug per-vmid request for debug flags */
u32 dbg_flags;
+ /* kfd process id */
+ u16 id;
+
atomic_t poison;
/* Queues are in paused stated because we are in the process of doing a CRIU checkpoint */
bool queues_paused;
@@ -1009,6 +1012,9 @@ struct kfd_process {
/* indicating whether this is a primary kfd_process */
bool primary;
+
+ /* The primary kfd_process allocating IDs for its secondary kfd_process, 0 for primary kfd_process */
+ struct ida id_table;
};
#define KFD_PROCESS_TABLE_SIZE 8 /* bits: 256 entries */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 440fde75d1e4..117e524f4fb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -54,6 +54,9 @@ DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_SRCU(kfd_processes_srcu);
+#define KFD_PROCESS_ID_MIN 1
+#define KFD_PROCESS_ID_WIDTH 16
+
/* For process termination handling */
static struct workqueue_struct *kfd_process_wq;
@@ -827,6 +830,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
int kfd_create_process_sysfs(struct kfd_process *process)
{
+ struct kfd_process *primary_process;
int ret;
if (process->kobj) {
@@ -839,9 +843,22 @@ int kfd_create_process_sysfs(struct kfd_process *process)
pr_warn("Creating procfs kobject failed");
return -ENOMEM;
}
- ret = kobject_init_and_add(process->kobj, &procfs_type,
- procfs.kobj, "%d",
- (int)process->lead_thread->pid);
+
+ if (process->primary)
+ ret = kobject_init_and_add(process->kobj, &procfs_type,
+ procfs.kobj, "%d",
+ (int)process->lead_thread->pid);
+ else {
+ primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
+ if (!primary_process)
+ return -ESRCH;
+
+ ret = kobject_init_and_add(process->kobj, &procfs_type,
+ primary_process->kobj, "context_%u",
+ process->id);
+ kfd_unref_process(primary_process);
+ }
+
if (ret) {
pr_warn("Creating procfs pid directory failed");
kobject_put(process->kobj);
@@ -863,6 +880,51 @@ int kfd_create_process_sysfs(struct kfd_process *process)
return 0;
}
+static int kfd_process_alloc_id(struct kfd_process *process)
+{
+ u16 ret;
+ struct kfd_process *primary_process;
+
+ if (process->primary) {
+ process->id = 0;
+
+ return 0;
+ }
+
+ primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
+ if (!primary_process)
+ return -ESRCH;
+
+ ret = ida_alloc_range(&primary_process->id_table, KFD_PROCESS_ID_MIN,
+ (1 << KFD_PROCESS_ID_WIDTH) - 1, GFP_KERNEL);
+ if (ret < 0)
</pre>
</blockquote>
<pre wrap="" class="moz-quote-pre">
Ret is an unsigned type. It cannot be < 0. ida_alloc_range returns int. I'd recommend making ret an int as well.</pre>
</blockquote>
<pre>Yes, will fix</pre>
<blockquote type="cite" cite="mid:5324e3ed-514c-443e-835e-3741d102fae6@amd.com">
<pre wrap="" class="moz-quote-pre">
</pre>
<blockquote type="cite">
<pre wrap="" class="moz-quote-pre">+ goto out;
+
+ process->id = ret;
+ ret = 0;
+
+out:
+ kfd_unref_process(primary_process);
+
+ return ret;
+}
+
+static void kfd_process_free_id(struct kfd_process *process)
+{
+ struct kfd_process *primary_process;
+
+ if (process->primary)
+ return;
+
+ primary_process = kfd_lookup_process_by_mm(process->lead_thread->mm);
+ if (!primary_process)
+ return;
+
+ ida_free(&primary_process->id_table, process->id);
+
+ kfd_unref_process(primary_process);
+}
+
struct kfd_process *kfd_create_process(struct task_struct *thread)
{
struct kfd_process *process;
@@ -1193,6 +1255,11 @@ static void kfd_process_wq_release(struct work_struct *work)
if (ef)
dma_fence_signal(ef);
+ if (!p->primary)
+ kfd_process_free_id(p);
+ else
+ ida_destroy(&p->id_table);
+
kfd_process_remove_sysfs(p);
kfd_debugfs_remove_process(p);
@@ -1549,6 +1616,12 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
process->queues_paused = false;
process->primary = primary;
+ err = kfd_process_alloc_id(process);
+ if (err) {
+ pr_err("Creating kfd process: failed to alloc an id\n");
+ goto err_alloc_id;
+ }
+
INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
@@ -1599,6 +1672,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
goto err_register_notifier;
}
BUG_ON(mn != &process->mmu_notifier);
+
+ ida_init(&process->id_table);
</pre>
</blockquote>
<pre wrap="" class="moz-quote-pre">
You should only init this for the primary process since you're only calling ida_destroy on the primary process.</pre>
</blockquote>
<pre>The code has been truncated when generating the patch,
we only init the id_table for the primary kfd_process,
the complete piece of code is:
if (primary) {
mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm);
if (IS_ERR(mn)) {
err = PTR_ERR(mn);
goto err_register_notifier;
}
BUG_ON(mn != &process->mmu_notifier);
ida_init(&process->id_table);
}
Thanks!
Lingshan</pre>
<blockquote type="cite" cite="mid:5324e3ed-514c-443e-835e-3741d102fae6@amd.com">
<pre wrap="" class="moz-quote-pre">
Regards,
Felix
</pre>
<blockquote type="cite">
<pre wrap="" class="moz-quote-pre"> }
kfd_unref_process(process);
@@ -1619,6 +1694,8 @@ static struct kfd_process *create_process(const struct task_struct *thread, bool
err_process_pqm_init:
kfd_event_free_process(process);
err_event_init:
+ kfd_process_free_id(process);
+err_alloc_id:
mutex_destroy(&process->mutex);
kfree(process);
err_alloc_process:
</pre>
</blockquote>
</blockquote>
</body>
</html>