[PATCH V2 09/10] amdkfd: decommission kfd_get_process and remove DIQ support
Zhu, Lingshan
lingshan.zhu at amd.com
Mon Aug 4 06:50:05 UTC 2025
On 8/2/2025 12:10 AM, Felix Kuehling wrote:
> On 2025-08-01 4:55, Zhu Lingshan wrote:
>> This commit decommissions the function kfd_get_process()
>> because it can not locate a specific kfd process among
>> mulitple contexts.
>>
>> This commit refactors the relevant code path accordingly:
>> - kmmap: retrieve the kfd_process from filep->private_data
>> - kq_initialize: queue->process for HIQ should be set to NULL
>> because it does not belong to any kfd_process. DIQ has been
>> decommissioned in this commit because it has been marked as
>> DEPRECATED since 2022 in commit 5bdd3eb2
>>
>> This commit removes test_kq() function becuse it has been
>> marked as unused since 2014 and no other functions calls it.
> Please split this into 3 commits:
>
> 1. Change how kfd_mmap looks up the process
> 2. Remove DIQ support
> 3. Remove test_kq
Will do!
> See one more comment inline.
>
>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 11 ++--
>> .../drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +-
>> drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 60 ++-----------------
>> .../drm/amd/amdkfd/kfd_packet_manager_v9.c | 4 --
>> .../drm/amd/amdkfd/kfd_packet_manager_vi.c | 4 --
>> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 -
>> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 18 ------
>> .../amd/amdkfd/kfd_process_queue_manager.c | 35 +----------
>> 8 files changed, 16 insertions(+), 123 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 5b22e1c47b2e..9e95acd23889 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -3408,16 +3408,19 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
>> }
>>
>>
>> -static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
>> +static int kfd_mmap(struct file *filep, struct vm_area_struct *vma)
>> {
>> struct kfd_process *process;
>> struct kfd_node *dev = NULL;
>> unsigned long mmap_offset;
>> unsigned int gpu_id;
>>
>> - process = kfd_get_process(current);
>> - if (IS_ERR(process))
>> - return PTR_ERR(process);
>> + process = filep->private_data;
>> + if (!process)
>> + return -ESRCH;
>> +
>> + if (process->lead_thread != current->group_leader)
>> + return -EBADF;
>>
>> mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
>> gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 76359c6a3f3a..3f78e0bb2dae 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -399,8 +399,7 @@ static void increment_queue_count(struct device_queue_manager *dqm,
>> struct queue *q)
>> {
>> dqm->active_queue_count++;
>> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>> - q->properties.type == KFD_QUEUE_TYPE_DIQ)
>> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>> dqm->active_cp_queue_count++;
>>
>> if (q->properties.is_gws) {
>> @@ -414,8 +413,7 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
>> struct queue *q)
>> {
>> dqm->active_queue_count--;
>> - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>> - q->properties.type == KFD_QUEUE_TYPE_DIQ)
>> + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>> dqm->active_cp_queue_count--;
>>
>> if (q->properties.is_gws) {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> index 2b0a830f5b29..6aa8b0348bad 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> @@ -46,7 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>> int retval;
>> union PM4_MES_TYPE_3_HEADER nop;
>>
>> - if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
>> + if (WARN_ON(type != KFD_QUEUE_TYPE_HIQ))
>> return false;
>>
>> pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
>> @@ -61,14 +61,9 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>>
>> kq->dev = dev;
>> kq->nop_packet = nop.u32all;
>> - switch (type) {
>> - case KFD_QUEUE_TYPE_DIQ:
>> - kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
>> - break;
>> - case KFD_QUEUE_TYPE_HIQ:
>> + if (type == KFD_QUEUE_TYPE_HIQ)
>> kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
>> - break;
>> - default:
>> + else {
>> dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
>> return false;
>> }
>> @@ -144,7 +139,8 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>> goto err_init_queue;
>>
>> kq->queue->device = dev;
>> - kq->queue->process = kfd_get_process(current);
>> + if (type == KFD_QUEUE_TYPE_HIQ)
>> + kq->queue->process = NULL;
>>
>> kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev,
>> &kq->queue->properties);
>> @@ -162,24 +158,11 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>> kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
>> kq->queue->pipe, kq->queue->queue,
>> &kq->queue->properties, NULL);
>> - } else {
>> - /* allocate fence for DIQ */
>> -
>> - retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
>> - &kq->fence_mem_obj);
>> -
>> - if (retval != 0)
>> - goto err_alloc_fence;
>> -
>> - kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr;
>> - kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr;
>> }
>>
>> print_queue(kq->queue);
>>
>> return true;
>> -err_alloc_fence:
>> - kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, kq->queue->mqd_mem_obj);
>> err_allocate_mqd:
>> uninit_queue(kq->queue);
>> err_init_queue:
>> @@ -209,8 +192,6 @@ static void kq_uninitialize(struct kernel_queue *kq)
>> kq->queue->queue);
>> up_read(&kq->dev->adev->reset_domain->sem);
>> }
>> - else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
>> - kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
>>
>> kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd,
>> kq->queue->mqd_mem_obj);
>> @@ -358,34 +339,3 @@ void kernel_queue_uninit(struct kernel_queue *kq)
>> kq_uninitialize(kq);
>> kfree(kq);
>> }
>> -
>> -/* FIXME: Can this test be removed? */
>> -static __attribute__((unused)) void test_kq(struct kfd_node *dev)
>> -{
>> - struct kernel_queue *kq;
>> - uint32_t *buffer, i;
>> - int retval;
>> -
>> - dev_err(dev->adev->dev, "Starting kernel queue test\n");
>> -
>> - kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
>> - if (unlikely(!kq)) {
>> - dev_err(dev->adev->dev, " Failed to initialize HIQ\n");
>> - dev_err(dev->adev->dev, "Kernel queue test failed\n");
>> - return;
>> - }
>> -
>> - retval = kq_acquire_packet_buffer(kq, 5, &buffer);
>> - if (unlikely(retval != 0)) {
>> - dev_err(dev->adev->dev, " Failed to acquire packet buffer\n");
>> - dev_err(dev->adev->dev, "Kernel queue test failed\n");
>> - return;
>> - }
>> - for (i = 0; i < 5; i++)
>> - buffer[i] = kq->nop_packet;
>> - kq_submit_packet(kq);
>> -
>> - dev_err(dev->adev->dev, "Ending kernel queue test\n");
>> -}
>> -
>> -
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
>> index 505036968a77..3d2375817c3e 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
>> @@ -252,10 +252,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
>> packet->bitfields2.queue_type =
>> queue_type__mes_map_queues__normal_latency_static_queue_vi;
>> break;
>> - case KFD_QUEUE_TYPE_DIQ:
>> - packet->bitfields2.queue_type =
>> - queue_type__mes_map_queues__debug_interface_queue_vi;
>> - break;
>> case KFD_QUEUE_TYPE_SDMA:
>> case KFD_QUEUE_TYPE_SDMA_XGMI:
>> if (q->properties.sdma_engine_id < 2 &&
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
>> index a1de5d7e173a..60086e7cc258 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
>> @@ -166,10 +166,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
>> packet->bitfields2.queue_type =
>> queue_type__mes_map_queues__normal_latency_static_queue_vi;
>> break;
>> - case KFD_QUEUE_TYPE_DIQ:
>> - packet->bitfields2.queue_type =
>> - queue_type__mes_map_queues__debug_interface_queue_vi;
>> - break;
>> case KFD_QUEUE_TYPE_SDMA:
>> case KFD_QUEUE_TYPE_SDMA_XGMI:
>> packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index a6e12c705734..67ebdaa9995f 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -1047,7 +1047,6 @@ void kfd_process_destroy_wq(void);
>> void kfd_cleanup_processes(void);
>> struct kfd_process *kfd_create_process(struct task_struct *thread);
>> int kfd_create_process_sysfs(struct kfd_process *process);
>> -struct kfd_process *kfd_get_process(const struct task_struct *task);
>> struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
>> struct kfd_process_device **pdd);
>> struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 117e524f4fb3..2d01356627ef 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -989,24 +989,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
>> return process;
>> }
>>
>> -struct kfd_process *kfd_get_process(const struct task_struct *thread)
>> -{
>> - struct kfd_process *process;
>> -
>> - if (!thread->mm)
>> - return ERR_PTR(-EINVAL);
>> -
>> - /* Only the pthreads threading model is supported. */
>> - if (thread->group_leader->mm != thread->mm)
>> - return ERR_PTR(-EINVAL);
>> -
>> - process = find_process(thread, false);
>> - if (!process)
>> - return ERR_PTR(-EINVAL);
>> -
>> - return process;
>> -}
>> -
>> static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
>> {
>> struct kfd_process *process;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> index c643e0ccec52..287ac5de838a 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> @@ -345,7 +345,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>> * If we are just about to create DIQ, the is_debug flag is not set yet
>> * Hence we also check the type as well
>> */
>> - if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
>> + if ((pdd->qpd.is_debug))
>> max_queues = dev->kfd->device_info.max_no_of_hqd/2;
>>
>> if (pdd->qpd.queue_count >= max_queues)
>> @@ -426,22 +426,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>> restore_mqd, restore_ctl_stack);
>> print_queue(q);
>> break;
>> - case KFD_QUEUE_TYPE_DIQ:
>> - kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
>> - if (!kq) {
>> - retval = -ENOMEM;
>> - goto err_create_queue;
>> - }
>> - kq->queue->properties.queue_id = *qid;
>> - pqn->kq = kq;
>> - pqn->q = NULL;
>> - retval = kfd_process_drain_interrupts(pdd);
>> - if (retval)
>> - break;
>> -
>> - retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
>> - kq, &pdd->qpd);
>> - break;
>> default:
>> WARN(1, "Invalid queue type %d", type);
>> retval = -EINVAL;
>> @@ -1128,24 +1112,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
>> mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
>> size = mqd_mgr->mqd_stride(mqd_mgr,
>> &q->properties);
>> - } else if (pqn->kq) {
>> - q = pqn->kq->queue;
>> - mqd_mgr = pqn->kq->mqd_mgr;
>> - switch (q->properties.type) {
>> - case KFD_QUEUE_TYPE_DIQ:
>> - seq_printf(m, " DIQ on device %x\n",
>> - pqn->kq->dev->id);
>> - break;
>> - default:
>> - seq_printf(m,
>> - " Bad kernel queue type %d on device %x\n",
>> - q->properties.type,
>> - pqn->kq->dev->id);
>> - continue;
>> - }
>> } else {
>> seq_printf(m,
>> - " Weird: Queue node with neither kernel nor user queue\n");
>> + " Weird: Queue node with neither kernel nor user queue\n");
> This message is no longer accurate, since this function no longer handles kernel queues at all.
I will improve this message, drop kernel queue.
Thanks
Lingshan
>
> Regards,
> Felix
>
>
>> continue;
>> }
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250804/e703d3f0/attachment-0001.htm>
More information about the amd-gfx
mailing list