[PATCH V2 09/10] amdkfd: decommission kfd_get_process and remove DIQ support

Zhu, Lingshan lingshan.zhu at amd.com
Mon Aug 4 06:50:05 UTC 2025


On 8/2/2025 12:10 AM, Felix Kuehling wrote:

> On 2025-08-01 4:55, Zhu Lingshan wrote:
>> This commit decommissions the function kfd_get_process()
>> because it can not locate a specific kfd process among
>> mulitple contexts.
>>
>> This commit refactors the relevant code path accordingly:
>> - kmmap: retrieve the kfd_process from filep->private_data
>> - kq_initialize: queue->process for HIQ should be set to NULL
>> because it does not belong to any kfd_process. DIQ has been
>> decommissioned in this commit because it has been marked as
>> DEPRECATED since 2022 in commit 5bdd3eb2
>>
>> This commit removes test_kq() function becuse it has been
>> marked as unused since 2014 and no other functions calls it.
> Please split this into 3 commits:
>
>  1. Change how kfd_mmap looks up the process
>  2. Remove DIQ support
>  3. Remove test_kq

Will do!

> See one more comment inline.
>
>
>> Signed-off-by: Zhu Lingshan <lingshan.zhu at amd.com>
>> ---
>>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 11 ++--
>>  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  6 +-
>>  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 60 ++-----------------
>>  .../drm/amd/amdkfd/kfd_packet_manager_v9.c    |  4 --
>>  .../drm/amd/amdkfd/kfd_packet_manager_vi.c    |  4 --
>>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  1 -
>>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 18 ------
>>  .../amd/amdkfd/kfd_process_queue_manager.c    | 35 +----------
>>  8 files changed, 16 insertions(+), 123 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> index 5b22e1c47b2e..9e95acd23889 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> @@ -3408,16 +3408,19 @@ static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process,
>>  }
>>  
>>  
>> -static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
>> +static int kfd_mmap(struct file *filep, struct vm_area_struct *vma)
>>  {
>>  	struct kfd_process *process;
>>  	struct kfd_node *dev = NULL;
>>  	unsigned long mmap_offset;
>>  	unsigned int gpu_id;
>>  
>> -	process = kfd_get_process(current);
>> -	if (IS_ERR(process))
>> -		return PTR_ERR(process);
>> +	process = filep->private_data;
>> +	if (!process)
>> +		return -ESRCH;
>> +
>> +	if (process->lead_thread != current->group_leader)
>> +		return -EBADF;
>>  
>>  	mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
>>  	gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 76359c6a3f3a..3f78e0bb2dae 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -399,8 +399,7 @@ static void increment_queue_count(struct device_queue_manager *dqm,
>>  				  struct queue *q)
>>  {
>>  	dqm->active_queue_count++;
>> -	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>> -	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
>> +	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>>  		dqm->active_cp_queue_count++;
>>  
>>  	if (q->properties.is_gws) {
>> @@ -414,8 +413,7 @@ static void decrement_queue_count(struct device_queue_manager *dqm,
>>  				  struct queue *q)
>>  {
>>  	dqm->active_queue_count--;
>> -	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
>> -	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
>> +	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
>>  		dqm->active_cp_queue_count--;
>>  
>>  	if (q->properties.is_gws) {
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> index 2b0a830f5b29..6aa8b0348bad 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
>> @@ -46,7 +46,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>>  	int retval;
>>  	union PM4_MES_TYPE_3_HEADER nop;
>>  
>> -	if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
>> +	if (WARN_ON(type != KFD_QUEUE_TYPE_HIQ))
>>  		return false;
>>  
>>  	pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
>> @@ -61,14 +61,9 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>>  
>>  	kq->dev = dev;
>>  	kq->nop_packet = nop.u32all;
>> -	switch (type) {
>> -	case KFD_QUEUE_TYPE_DIQ:
>> -		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ];
>> -		break;
>> -	case KFD_QUEUE_TYPE_HIQ:
>> +	if (type == KFD_QUEUE_TYPE_HIQ)
>>  		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
>> -		break;
>> -	default:
>> +	else {
>>  		dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
>>  		return false;
>>  	}
>> @@ -144,7 +139,8 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>>  		goto err_init_queue;
>>  
>>  	kq->queue->device = dev;
>> -	kq->queue->process = kfd_get_process(current);
>> +	if (type == KFD_QUEUE_TYPE_HIQ)
>> +		kq->queue->process = NULL;
>>  
>>  	kq->queue->mqd_mem_obj = kq->mqd_mgr->allocate_mqd(kq->mqd_mgr->dev,
>>  					&kq->queue->properties);
>> @@ -162,24 +158,11 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
>>  		kq->mqd_mgr->load_mqd(kq->mqd_mgr, kq->queue->mqd,
>>  				kq->queue->pipe, kq->queue->queue,
>>  				&kq->queue->properties, NULL);
>> -	} else {
>> -		/* allocate fence for DIQ */
>> -
>> -		retval = kfd_gtt_sa_allocate(dev, sizeof(uint32_t),
>> -						&kq->fence_mem_obj);
>> -
>> -		if (retval != 0)
>> -			goto err_alloc_fence;
>> -
>> -		kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr;
>> -		kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr;
>>  	}
>>  
>>  	print_queue(kq->queue);
>>  
>>  	return true;
>> -err_alloc_fence:
>> -	kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd, kq->queue->mqd_mem_obj);
>>  err_allocate_mqd:
>>  	uninit_queue(kq->queue);
>>  err_init_queue:
>> @@ -209,8 +192,6 @@ static void kq_uninitialize(struct kernel_queue *kq)
>>  					kq->queue->queue);
>>  		up_read(&kq->dev->adev->reset_domain->sem);
>>  	}
>> -	else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
>> -		kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
>>  
>>  	kq->mqd_mgr->free_mqd(kq->mqd_mgr, kq->queue->mqd,
>>  				kq->queue->mqd_mem_obj);
>> @@ -358,34 +339,3 @@ void kernel_queue_uninit(struct kernel_queue *kq)
>>  	kq_uninitialize(kq);
>>  	kfree(kq);
>>  }
>> -
>> -/* FIXME: Can this test be removed? */
>> -static __attribute__((unused)) void test_kq(struct kfd_node *dev)
>> -{
>> -	struct kernel_queue *kq;
>> -	uint32_t *buffer, i;
>> -	int retval;
>> -
>> -	dev_err(dev->adev->dev, "Starting kernel queue test\n");
>> -
>> -	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
>> -	if (unlikely(!kq)) {
>> -		dev_err(dev->adev->dev, "  Failed to initialize HIQ\n");
>> -		dev_err(dev->adev->dev, "Kernel queue test failed\n");
>> -		return;
>> -	}
>> -
>> -	retval = kq_acquire_packet_buffer(kq, 5, &buffer);
>> -	if (unlikely(retval != 0)) {
>> -		dev_err(dev->adev->dev, "  Failed to acquire packet buffer\n");
>> -		dev_err(dev->adev->dev, "Kernel queue test failed\n");
>> -		return;
>> -	}
>> -	for (i = 0; i < 5; i++)
>> -		buffer[i] = kq->nop_packet;
>> -	kq_submit_packet(kq);
>> -
>> -	dev_err(dev->adev->dev, "Ending kernel queue test\n");
>> -}
>> -
>> -
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
>> index 505036968a77..3d2375817c3e 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
>> @@ -252,10 +252,6 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
>>  			packet->bitfields2.queue_type =
>>  		queue_type__mes_map_queues__normal_latency_static_queue_vi;
>>  		break;
>> -	case KFD_QUEUE_TYPE_DIQ:
>> -		packet->bitfields2.queue_type =
>> -			queue_type__mes_map_queues__debug_interface_queue_vi;
>> -		break;
>>  	case KFD_QUEUE_TYPE_SDMA:
>>  	case KFD_QUEUE_TYPE_SDMA_XGMI:
>>  		if (q->properties.sdma_engine_id < 2 &&
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
>> index a1de5d7e173a..60086e7cc258 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
>> @@ -166,10 +166,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
>>  			packet->bitfields2.queue_type =
>>  		queue_type__mes_map_queues__normal_latency_static_queue_vi;
>>  		break;
>> -	case KFD_QUEUE_TYPE_DIQ:
>> -		packet->bitfields2.queue_type =
>> -			queue_type__mes_map_queues__debug_interface_queue_vi;
>> -		break;
>>  	case KFD_QUEUE_TYPE_SDMA:
>>  	case KFD_QUEUE_TYPE_SDMA_XGMI:
>>  		packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index a6e12c705734..67ebdaa9995f 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -1047,7 +1047,6 @@ void kfd_process_destroy_wq(void);
>>  void kfd_cleanup_processes(void);
>>  struct kfd_process *kfd_create_process(struct task_struct *thread);
>>  int kfd_create_process_sysfs(struct kfd_process *process);
>> -struct kfd_process *kfd_get_process(const struct task_struct *task);
>>  struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid,
>>  						 struct kfd_process_device **pdd);
>>  struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> index 117e524f4fb3..2d01356627ef 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
>> @@ -989,24 +989,6 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
>>  	return process;
>>  }
>>  
>> -struct kfd_process *kfd_get_process(const struct task_struct *thread)
>> -{
>> -	struct kfd_process *process;
>> -
>> -	if (!thread->mm)
>> -		return ERR_PTR(-EINVAL);
>> -
>> -	/* Only the pthreads threading model is supported. */
>> -	if (thread->group_leader->mm != thread->mm)
>> -		return ERR_PTR(-EINVAL);
>> -
>> -	process = find_process(thread, false);
>> -	if (!process)
>> -		return ERR_PTR(-EINVAL);
>> -
>> -	return process;
>> -}
>> -
>>  static struct kfd_process *find_process_by_mm(const struct mm_struct *mm)
>>  {
>>  	struct kfd_process *process;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> index c643e0ccec52..287ac5de838a 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> @@ -345,7 +345,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>>  	 * If we are just about to create DIQ, the is_debug flag is not set yet
>>  	 * Hence we also check the type as well
>>  	 */
>> -	if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ))
>> +	if ((pdd->qpd.is_debug))
>>  		max_queues = dev->kfd->device_info.max_no_of_hqd/2;
>>  
>>  	if (pdd->qpd.queue_count >= max_queues)
>> @@ -426,22 +426,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
>>  						    restore_mqd, restore_ctl_stack);
>>  		print_queue(q);
>>  		break;
>> -	case KFD_QUEUE_TYPE_DIQ:
>> -		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
>> -		if (!kq) {
>> -			retval = -ENOMEM;
>> -			goto err_create_queue;
>> -		}
>> -		kq->queue->properties.queue_id = *qid;
>> -		pqn->kq = kq;
>> -		pqn->q = NULL;
>> -		retval = kfd_process_drain_interrupts(pdd);
>> -		if (retval)
>> -			break;
>> -
>> -		retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
>> -							kq, &pdd->qpd);
>> -		break;
>>  	default:
>>  		WARN(1, "Invalid queue type %d", type);
>>  		retval = -EINVAL;
>> @@ -1128,24 +1112,9 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data)
>>  			mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
>>  			size = mqd_mgr->mqd_stride(mqd_mgr,
>>  							&q->properties);
>> -		} else if (pqn->kq) {
>> -			q = pqn->kq->queue;
>> -			mqd_mgr = pqn->kq->mqd_mgr;
>> -			switch (q->properties.type) {
>> -			case KFD_QUEUE_TYPE_DIQ:
>> -				seq_printf(m, "  DIQ on device %x\n",
>> -					   pqn->kq->dev->id);
>> -				break;
>> -			default:
>> -				seq_printf(m,
>> -				"  Bad kernel queue type %d on device %x\n",
>> -					   q->properties.type,
>> -					   pqn->kq->dev->id);
>> -				continue;
>> -			}
>>  		} else {
>>  			seq_printf(m,
>> -		"  Weird: Queue node with neither kernel nor user queue\n");
>> +			"  Weird: Queue node with neither kernel nor user queue\n");
> This message is no longer accurate, since this function no longer handles kernel queues at all.

I will improve this message, drop kernel queue.

Thanks
Lingshan

>
> Regards,
>   Felix
>
>
>>  			continue;
>>  		}
>>  
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20250804/e703d3f0/attachment-0001.htm>


More information about the amd-gfx mailing list