<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <pre>Thanks, Felix. All issues have been fixed, and I will send out V2 soon.

Thanks
Lingshan</pre>
    <div class="moz-cite-prefix">On 7/29/2025 3:48 AM, Felix Kuehling
      wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:51264616-aca9-4b82-822e-7c8856b47629@amd.com">On
      2025-07-24 22:43, Zhu Lingshan wrote:
      <br>
      <blockquote type="cite">This commit introduces a new id field for
        <br>
        struct kfd process, which helps identify
        <br>
        a kfd process among multiple contexts that
        <br>
        all belong to a single user space program.
        <br>
        <br>
        The sysfs entry of a secondary kfd process
        <br>
        is placed under the sysfs entry folder of
        <br>
        its primary kfd process.
        <br>
        <br>
        The naming format of the sysfs entry of a secondary
        <br>
        kfd process is "context_%u" where %u is the process id.
        <br>
        <br>
        Signed-off-by: Zhu Lingshan <a class="moz-txt-link-rfc2396E" href="mailto:lingshan.zhu@amd.com"><lingshan.zhu@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ++
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_process.c | 82
        +++++++++++++++++++++++-
        <br>
          2 files changed, 85 insertions(+), 3 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        index de701d72aa5c..a6e12c705734 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
        <br>
        @@ -995,6 +995,9 @@ struct kfd_process {
        <br>
              /* Tracks debug per-vmid request for debug flags */
        <br>
              u32 dbg_flags;
        <br>
          +    /* kfd process id */
        <br>
        +    u16 id;
        <br>
        +
        <br>
              atomic_t poison;
        <br>
              /* Queues are in paused stated because we are in the
        process of doing a CRIU checkpoint */
        <br>
              bool queues_paused;
        <br>
        @@ -1009,6 +1012,9 @@ struct kfd_process {
        <br>
                /* indicating whether this is a primary kfd_process */
        <br>
              bool primary;
        <br>
        +
        <br>
        +    /* The primary kfd_process allocating IDs for its secondary
        kfd_process, 0 for primary kfd_process */
        <br>
        +    struct ida id_table;
        <br>
          };
        <br>
            #define KFD_PROCESS_TABLE_SIZE 8 /* bits: 256 entries */
        <br>
        diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        <br>
        index 440fde75d1e4..e1ba9015bb83 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
        <br>
        @@ -54,6 +54,9 @@ DEFINE_MUTEX(kfd_processes_mutex);
        <br>
            DEFINE_SRCU(kfd_processes_srcu);
        <br>
          +#define KFD_PROCESS_ID_MIN 1
        <br>
        +#define KFD_PROCESS_ID_WIDTH 16
        <br>
        +
        <br>
          /* For process termination handling */
        <br>
          static struct workqueue_struct *kfd_process_wq;
        <br>
          @@ -827,6 +830,7 @@ static void
        kfd_process_device_destroy_ib_mem(struct kfd_process_device
        *pdd)
        <br>
            int kfd_create_process_sysfs(struct kfd_process *process)
        <br>
          {
        <br>
        +    struct kfd_process *primary_process;
        <br>
              int ret;
        <br>
                if (process->kobj) {
        <br>
        @@ -839,9 +843,22 @@ int kfd_create_process_sysfs(struct
        kfd_process *process)
        <br>
                  pr_warn("Creating procfs kobject failed");
        <br>
                  return -ENOMEM;
        <br>
              }
        <br>
        -    ret = kobject_init_and_add(process->kobj,
        &procfs_type,
        <br>
        -                   procfs.kobj, "%d",
        <br>
        -                   (int)process->lead_thread->pid);
        <br>
        +
        <br>
        +    if (process->primary)
        <br>
        +        ret = kobject_init_and_add(process->kobj,
        &procfs_type,
        <br>
        +                       procfs.kobj, "%d",
        <br>
        +                       (int)process->lead_thread->pid);
        <br>
        +    else {
        <br>
        +        primary_process =
        kfd_lookup_process_by_mm(process->lead_thread->mm);
        <br>
        +        if (!primary_process)
        <br>
        +            return -EFAULT;
        <br>
      </blockquote>
      <br>
      EFAULT means "Bad address". A better error code would be ESRCH "No
      such process".
      <br>
      <br>
      <br>
      <blockquote type="cite">+
        <br>
        +        ret = kobject_init_and_add(process->kobj,
        &procfs_type,
        <br>
        +                       primary_process->kobj, "context_%u",
        <br>
        +                       process->id);
        <br>
        +        kfd_unref_process(primary_process);
        <br>
        +    }
        <br>
        +
        <br>
              if (ret) {
        <br>
                  pr_warn("Creating procfs pid directory failed");
        <br>
                  kobject_put(process->kobj);
        <br>
        @@ -863,6 +880,51 @@ int kfd_create_process_sysfs(struct
        kfd_process *process)
        <br>
              return 0;
        <br>
          }
        <br>
          +static int kfd_process_alloc_id(struct kfd_process *process)
        <br>
        +{
        <br>
        +    u16 ret;
        <br>
        +    struct kfd_process *primary_process;
        <br>
        +
        <br>
        +    if (process->primary) {
        <br>
        +        process->id = 0;
        <br>
        +
        <br>
        +        return 0;
        <br>
        +    }
        <br>
        +
        <br>
        +    primary_process =
        kfd_lookup_process_by_mm(process->lead_thread->mm);
        <br>
        +    if (!primary_process)
        <br>
        +        return -EFAULT;
        <br>
      </blockquote>
      <br>
      ESRCH
      <br>
      <br>
      <br>
      <blockquote type="cite">+
        <br>
        +    ret = ida_alloc_range(&primary_process->id_table,
        KFD_PROCESS_ID_MIN,
        <br>
        +         1 << (KFD_PROCESS_ID_WIDTH - 1), GFP_KERNEL);
        <br>
      </blockquote>
      <br>
      Did you mean (1 << KFD_PROCESS_ID_WIDTH) - 1? That would
      give you the range from 1 to 0xffff, which is what I'd expect with
      16-bit wide ID.
      <br>
      <br>
      <br>
      <blockquote type="cite">+    if (ret < 0)
        <br>
        +        return ret;
        <br>
      </blockquote>
      <br>
      You're leaking a process reference here.
      <br>
      <br>
      <br>
      <blockquote type="cite">+
        <br>
        +    process->id = ret;
        <br>
        +
        <br>
        +    kfd_unref_process(primary_process);
        <br>
        +
        <br>
        +    return 0;
        <br>
        +}
        <br>
        +
        <br>
        +static void kfd_process_free_id(struct kfd_process *process)
        <br>
        +{
        <br>
        +    struct kfd_process *primary_process;
        <br>
        +
        <br>
        +    if (process->primary)
        <br>
        +        return;
        <br>
        +
        <br>
        +    primary_process =
        kfd_lookup_process_by_mm(process->lead_thread->mm);
        <br>
        +    if (!primary_process)
        <br>
        +        return;
        <br>
        +
        <br>
        +    ida_free(&primary_process->id_table,
        process->id);
        <br>
        +
        <br>
        +    kfd_unref_process(primary_process);
        <br>
        +
        <br>
        +    return;
        <br>
      </blockquote>
      <br>
      This return statement is unnecessary.
      <br>
      <br>
      <br>
      <blockquote type="cite">+}
        <br>
        +
        <br>
          struct kfd_process *kfd_create_process(struct task_struct
        *thread)
        <br>
          {
        <br>
              struct kfd_process *process;
        <br>
        @@ -1193,6 +1255,11 @@ static void kfd_process_wq_release(struct
        work_struct *work)
        <br>
              if (ef)
        <br>
                  dma_fence_signal(ef);
        <br>
          +    if (!p->primary)
        <br>
        +        kfd_process_free_id(p);
        <br>
        +    else
        <br>
        +        ida_destroy(&p->id_table);
        <br>
        +
        <br>
              kfd_process_remove_sysfs(p);
        <br>
              kfd_debugfs_remove_process(p);
        <br>
          @@ -1549,6 +1616,12 @@ static struct kfd_process
        *create_process(const struct task_struct *thread, bool
        <br>
              process->queues_paused = false;
        <br>
              process->primary = primary;
        <br>
          +    err = kfd_process_alloc_id(process);
        <br>
        +    if (err) {
        <br>
        +        pr_err("Creating kfd process: failed to alloc an
        id\n");
        <br>
        +        goto err_alloc_process;
        <br>
      </blockquote>
      <br>
      That's the wrong label for cleanup. You'd end up leaking the
      process structure. You need to create a new label. See below.
      <br>
      <br>
      <br>
      <blockquote type="cite">+    }
        <br>
        +
        <br>
              INIT_DELAYED_WORK(&process->eviction_work,
        evict_process_worker);
        <br>
              INIT_DELAYED_WORK(&process->restore_work,
        restore_process_worker);
        <br>
              process->last_restore_timestamp = get_jiffies_64();
        <br>
        @@ -1599,6 +1672,8 @@ static struct kfd_process
        *create_process(const struct task_struct *thread, bool
        <br>
                      goto err_register_notifier;
        <br>
                  }
        <br>
                  BUG_ON(mn != &process->mmu_notifier);
        <br>
        +
        <br>
        +        ida_init(&process->id_table);
        <br>
              }
        <br>
                kfd_unref_process(process);
        <br>
        @@ -1619,6 +1694,7 @@ static struct kfd_process
        *create_process(const struct task_struct *thread, bool
        <br>
          err_process_pqm_init:
        <br>
              kfd_event_free_process(process);
        <br>
          err_event_init:
        <br>
        +    kfd_process_free_id(process);
        <br>
      </blockquote>
      <br>
      You should add a new label here
      <br>
      <br>
      err_alloc_id:
      <br>
      <br>
      Regards,
      <br>
        Felix
      <br>
      <br>
      <br>
      <blockquote type="cite">     
        mutex_destroy(&process->mutex);
        <br>
              kfree(process);
        <br>
          err_alloc_process:
        <br>
      </blockquote>
    </blockquote>
  </body>
</html>