[PATCH V2 09/10] amdkfd: decommission kfd_get_process and remove DIQ support
Felix Kuehling
felix.kuehling at amd.com
Fri Aug 1 16:10:23 UTC 2025
On 2025-08-01 4:55, Zhu Lingshan wrote:
> This commit decommissions the function kfd_get_process()
> because it can not locate a specific kfd process among
> mulitple contexts.
>
> This commit refactors the relevant code path accordingly:
> - kmmap: retrieve the kfd_process from filep->private_data
> - kq_initialize: queue->process for HIQ should be set to NULL
> because it does not belong to any kfd_process. DIQ has been
> decommissioned in this commit because it has been marked as
> DEPRECATED since 2022 in commit 5bdd3eb2
>
> This commit removes test_kq() function becuse it has been
> marked as unused since 2014 and no other functions calls it.
Please split this into 3 commits:
1. Change how kfd_mmap looks up the process
2. Remove DIQ support
3. Remove test_kq
See one more comment inline.
>
> Signed-off-by: Zhu Lingshan <lingshan.zhu at amd.com>
> ---
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 11 ++--
> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +-
> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 60 ++-----------------
> .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 4 --
> .../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 --
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 -
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 18 ------
> .../amd/amdkfd/kfd_process_queue_manager.c | 35 +----------
> 8 files changed, 16 insertions(+), 123 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 5b22e1c47b2e..9e95acd23889 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -3408,16 +3408,19 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
> }
>
>
> -static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
> +static int kfd_mmap(struct file *filep, struct vm_area_struct *vma)
> {
> struct kfd_process *process;
> struct kfd_node *dev = NULL;
> unsigned long mmap_offset;
> unsigned int gpu_id;
>
> - process = kfd_get_process(current);
> - if (IS_ERR(process))
> - return PTR_ERR(process);
> + process = filep->private_data;
> + if (!process)
> + return -ESRCH;
> +
> + if (process->lead_thread != current->group_leader)
> + return -EBADF;
>
> mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
> gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 76359c6a3f3a..3f78e0bb2dae 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -399,8 +399,7 @@ static void increment_queue_count(struct device_queue_manager *dqm,
> struct queue *q)
> {
> dqm->active_queue_count++;
> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> - q->properties.type == KFD_QUEUE_TYPE_DIQ)
> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
> dqm->active_cp_queue_count++;
>
> if (q->properties.is_gws) {
> @@ -414,8 +413,7 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
> struct queue *q)
> {
> dqm->active_queue_count--;
> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
> - q->properties.type == KFD_QUEUE_TYPE_DIQ)
> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
> dqm->active_cp_queue_count--;
>
> if (q->properties.is_gws) {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> index 2b0a830f5b29..6aa8b0348bad 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
> @@ -46,7 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
> int retval;
> union PM4_MES_TYPE_3_HEADER nop;
>
> - if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
> + if (WARN_ON(type != KFD_QUEUE_TYPE_HIQ))
> return false;
>
> pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
> @@ -61,14 +61,9 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>
> kq->dev = dev;
> kq->nop_packet = nop.u32all;
> - switch (type) {
> - case KFD_QUEUE_TYPE_DIQ:
> - kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
> - break;
> - case KFD_QUEUE_TYPE_HIQ:
> + if (type == KFD_QUEUE_TYPE_HIQ)
> kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
> - break;
> - default:
> + else {
> dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
> return false;
> }
> @@ -144,7 +139,8 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
> goto err_init_queue;
>
> kq->queue->device = dev;
> - kq->queue->process = kfd_get_process(current);
> + if (type == KFD_QUEUE_TYPE_HIQ)
> + kq->queue->process = NULL;
>
> kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev,
> &kq->queue->properties);
> @@ -162,24 +158,11 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
> kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
> kq->queue->pipe, kq->queue->queue,
> &kq->queue->properties, NULL);
> - } else {
> - /* allocate fence for DIQ */
> -
> - retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
> - &kq->fence_mem_obj);
> -
> - if (retval != 0)
> - goto err_alloc_fence;
> -
> - kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr;
> - kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr;
> }
>
> print_queue(kq->queue);
>
> return true;
> -err_alloc_fence:
> - kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, kq->queue->mqd_mem_obj);
> err_allocate_mqd:
> uninit_queue(kq->queue);
> err_init_queue:
> @@ -209,8 +192,6 @@ static void kq_uninitialize(struct kernel_queue *kq)
> kq->queue->queue);
> up_read(&kq->dev->adev->reset_domain->sem);
> }
> - else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
> - kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
>
> kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd,
> kq->queue->mqd_mem_obj);
> @@ -358,34 +339,3 @@ void kernel_queue_uninit(struct kernel_queue *kq)
> kq_uninitialize(kq);
> kfree(kq);
> }
> -
> -/* FIXME: Can this test be removed? */
> -static __attribute__((unused)) void test_kq(struct kfd_node *dev)
> -{
> - struct kernel_queue *kq;
> - uint32_t *buffer, i;
> - int retval;
> -
> - dev_err(dev->adev->dev, "Starting kernel queue test\n");
> -
> - kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
> - if (unlikely(!kq)) {
> - dev_err(dev->adev->dev, " Failed to initialize HIQ\n");
> - dev_err(dev->adev->dev, "Kernel queue test failed\n");
> - return;
> - }
> -
> - retval = kq_acquire_packet_buffer(kq, 5, &buffer);
> - if (unlikely(retval != 0)) {
> - dev_err(dev->adev->dev, " Failed to acquire packet buffer\n");
> - dev_err(dev->adev->dev, "Kernel queue test failed\n");
> - return;
> - }
> - for (i = 0; i < 5; i++)
> - buffer[i] = kq->nop_packet;
> - kq_submit_packet(kq);
> -
> - dev_err(dev->adev->dev, "Ending kernel queue test\n");
> -}
> -
> -
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> index 505036968a77..3d2375817c3e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
> @@ -252,10 +252,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
> packet->bitfields2.queue_type =
> queue_type__mes_map_queues__normal_latency_static_queue_vi;
> break;
> - case KFD_QUEUE_TYPE_DIQ:
> - packet->bitfields2.queue_type =
> - queue_type__mes_map_queues__debug_interface_queue_vi;
> - break;
> case KFD_QUEUE_TYPE_SDMA:
> case KFD_QUEUE_TYPE_SDMA_XGMI:
> if (q->properties.sdma_engine_id < 2 &&
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> index a1de5d7e173a..60086e7cc258 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
> @@ -166,10 +166,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
> packet->bitfields2.queue_type =
> queue_type__mes_map_queues__normal_latency_static_queue_vi;
> break;
> - case KFD_QUEUE_TYPE_DIQ:
> - packet->bitfields2.queue_type =
> - queue_type__mes_map_queues__debug_interface_queue_vi;
> - break;
> case KFD_QUEUE_TYPE_SDMA:
> case KFD_QUEUE_TYPE_SDMA_XGMI:
> packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index a6e12c705734..67ebdaa9995f 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -1047,7 +1047,6 @@ void kfd_process_destroy_wq(void);
> void kfd_cleanup_processes(void);
> struct kfd_process *kfd_create_process(struct task_struct *thread);
> int kfd_create_process_sysfs(struct kfd_process *process);
> -struct kfd_process *kfd_get_process(const struct task_struct *task);
> struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
> struct kfd_process_device **pdd);
> struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 117e524f4fb3..2d01356627ef 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -989,24 +989,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
> return process;
> }
>
> -struct kfd_process *kfd_get_process(const struct task_struct *thread)
> -{
> - struct kfd_process *process;
> -
> - if (!thread->mm)
> - return ERR_PTR(-EINVAL);
> -
> - /* Only the pthreads threading model is supported. */
> - if (thread->group_leader->mm != thread->mm)
> - return ERR_PTR(-EINVAL);
> -
> - process = find_process(thread, false);
> - if (!process)
> - return ERR_PTR(-EINVAL);
> -
> - return process;
> -}
> -
> static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
> {
> struct kfd_process *process;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> index c643e0ccec52..287ac5de838a 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
> @@ -345,7 +345,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
> * If we are just about to create DIQ, the is_debug flag is not set yet
> * Hence we also check the type as well
> */
> - if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
> + if ((pdd->qpd.is_debug))
> max_queues = dev->kfd->device_info.max_no_of_hqd/2;
>
> if (pdd->qpd.queue_count >= max_queues)
> @@ -426,22 +426,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
> restore_mqd, restore_ctl_stack);
> print_queue(q);
> break;
> - case KFD_QUEUE_TYPE_DIQ:
> - kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
> - if (!kq) {
> - retval = -ENOMEM;
> - goto err_create_queue;
> - }
> - kq->queue->properties.queue_id = *qid;
> - pqn->kq = kq;
> - pqn->q = NULL;
> - retval = kfd_process_drain_interrupts(pdd);
> - if (retval)
> - break;
> -
> - retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
> - kq, &pdd->qpd);
> - break;
> default:
> WARN(1, "Invalid queue type %d", type);
> retval = -EINVAL;
> @@ -1128,24 +1112,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
> mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
> size = mqd_mgr->mqd_stride(mqd_mgr,
> &q->properties);
> - } else if (pqn->kq) {
> - q = pqn->kq->queue;
> - mqd_mgr = pqn->kq->mqd_mgr;
> - switch (q->properties.type) {
> - case KFD_QUEUE_TYPE_DIQ:
> - seq_printf(m, " DIQ on device %x\n",
> - pqn->kq->dev->id);
> - break;
> - default:
> - seq_printf(m,
> - " Bad kernel queue type %d on device %x\n",
> - q->properties.type,
> - pqn->kq->dev->id);
> - continue;
> - }
> } else {
> seq_printf(m,
> - " Weird: Queue node with neither kernel nor user queue\n");
> + " Weird: Queue node with neither kernel nor user queue\n");
This message is no longer accurate, since this function no longer handles kernel queues at all.
Regards,
Felix
> continue;
> }
>
More information about the amd-gfx
mailing list