[PATCH v5 4/5] drm/amdgpu: add debugfs support for VM pagetable per client

Thu Jun 26 14:11:47 UTC 2025

On 6/26/2025 5:48 PM, Christian König wrote:
> On 24.06.25 13:34, Sunil Khatri wrote:
>> Each drm node is associated with a unique client-id.
>> Create a directory for each drm-file in the dri root
>> directory. This directory is unique to hold information
>> related to a client id which is unique in the system
>> irrespective of how many drm devices are on the system.
>>
>> Adding root page table base address of the VM under
>> the client-id node along with the process information
>> in debugfs.
>>
>> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  2 +-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 58 ++++++++++++++++++++++++-
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  4 +-
>>   3 files changed, 61 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index d2ce7d86dbc8..aa912168fd68 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1395,7 +1395,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>>   	if (r)
>>   		goto error_pasid;
>>   
>> -	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
>> +	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, file_priv);
>>   	if (r)
>>   		goto error_pasid;
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 3911c78f8282..9e3dd187b597 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2520,12 +2520,67 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>>   	get_task_comm(vm->task_info->process_name, current->group_leader);
>>   }
>>   
>> +#if defined(CONFIG_DEBUG_FS)
>> +static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
>> +{
>> +	struct drm_file *file;
>> +	struct amdgpu_fpriv *fpriv;
>> +	struct pid *pid;
>> +	struct task_struct *task;
>> +	struct amdgpu_bo *root_bo;
>> +	int r;
>> +
>> +	file = (struct drm_file *)m->private;
>> +	if (!file || !file->driver_priv)
>> +		return -EINVAL;
> This here is racy. It can be that the debugfs file is opened and read in just the exact moment the client it belongs to is closed.'
I dont see drm_file ref_get being taken anywhere in our driver. It's 
directly being used.
>
> You need to do something like this here:
>
> if (!file_ref_get(&file->file->f_ref))
> 	return -EINVAL;
>
> And then an fput(file->file) at the end of the function.
>
> It would probably best to have a helper for that in drm_debugfs.c since that needs to be done for all per client debugfs files.
In case you think we still need it then why not directly user 
file_ref_get and fput itself. why need a new function all together for 
one line.
>
>> +
>> +	fpriv = file->driver_priv;
>> +	if (!fpriv || !fpriv->vm.root.bo)
>> +		return -ENODEV;
>> +
>> +	root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
>> +	r = amdgpu_bo_reserve(root_bo, true);
>> +	if (r) {
>> +		amdgpu_bo_unref(&root_bo);
>> +		return 0;
>> +	}
>> +
>> +	rcu_read_lock();
>> +	pid = rcu_dereference(file->pid);
>> +	task = pid_task(pid, PIDTYPE_TGID);
>> +
>> +	seq_printf(m, "pid: %d\n", task ? task->pid : 0);
>> +	seq_printf(m, "comm: %s\n", task ? task->comm : "Unset");
> Thinking more about it, the pid and task name should probably be a different driver independent file, e.g. implemented in drm_debugfs.c
>
> Because that is something all drivers should be able to provide.
>
> We could potentially print the same line of information we print in the clints debugfs file.

We do exactly same thing in drm_debugfs in drm_file_err, and i am doing 
exactly same thing here. If not, there in drm_debugfs i will have to 
create one for debugfs file there for each client but doing it here for 
both information in one point here. But i am open any if you think that 
would be a better approach.

Regards
Sunil Khatri

>
> Regards,
> Christian.
>
>> +	seq_printf(m, "pt_base: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo));
>> +
>> +	rcu_read_unlock();
>> +	amdgpu_bo_unreserve(root_bo);
>> +	amdgpu_bo_unref(&root_bo);
>> +
>> +	return 0;
>> +}
>> +
>> +static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
>> +{
>> +	return single_open(file, amdgpu_pt_info_read, inode->i_private);
>> +}
>> +
>> +static const struct file_operations amdgpu_pt_info_fops = {
>> +	.owner = THIS_MODULE,
>> +	.open = amdgpu_pt_info_open,
>> +	.read = seq_read,
>> +	.llseek = seq_lseek,
>> +	.release = single_release,
>> +};
>> +#endif
>> +
>>   /**
>>    * amdgpu_vm_init - initialize a vm instance
>>    *
>>    * @adev: amdgpu_device pointer
>>    * @vm: requested vm
>>    * @xcp_id: GPU partition selection id
>> + * @file: drm_file
>>    *
>>    * Init @vm fields.
>>    *
>> @@ -2533,7 +2588,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>>    * 0 for success, error for failure.
>>    */
>>   int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> -		   int32_t xcp_id)
>> +		   int32_t xcp_id, struct drm_file *file)
>>   {
>>   	struct amdgpu_bo *root_bo;
>>   	struct amdgpu_bo_vm *root;
>> @@ -2609,6 +2664,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   	if (r)
>>   		DRM_DEBUG("Failed to create task info for VM\n");
>>   
>> +	debugfs_create_file("pt_info", 0444, file->debugfs_client, file, &amdgpu_pt_info_fops);
>>   	amdgpu_bo_unreserve(vm->root.bo);
>>   	amdgpu_bo_unref(&root_bo);
>>   
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index f3ad687125ad..555afaf867c4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -487,7 +487,9 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>>   			u32 pasid);
>>   
>>   long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id,
>> +		   struct drm_file *file);
>> +
>>   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>>   int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,