[PATCH v5 4/5] drm/amdgpu: add debugfs support for VM pagetable per client

Thu Jun 26 12:18:44 UTC 2025

On 24.06.25 13:34, Sunil Khatri wrote:
> Each drm node is associated with a unique client-id.
> Create a directory for each drm-file in the dri root
> directory. This directory is unique to hold information
> related to a client id which is unique in the system
> irrespective of how many drm devices are on the system.
> 
> Adding root page table base address of the VM under
> the client-id node along with the process information
> in debugfs.
> 
> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 58 ++++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  4 +-
>  3 files changed, 61 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index d2ce7d86dbc8..aa912168fd68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -1395,7 +1395,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>  	if (r)
>  		goto error_pasid;
>  
> -	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
> +	r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, file_priv);
>  	if (r)
>  		goto error_pasid;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 3911c78f8282..9e3dd187b597 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2520,12 +2520,67 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>  	get_task_comm(vm->task_info->process_name, current->group_leader);
>  }
>  
> +#if defined(CONFIG_DEBUG_FS)
> +static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
> +{
> +	struct drm_file *file;
> +	struct amdgpu_fpriv *fpriv;
> +	struct pid *pid;
> +	struct task_struct *task;
> +	struct amdgpu_bo *root_bo;
> +	int r;
> +

> +	file = (struct drm_file *)m->private;
> +	if (!file || !file->driver_priv)
> +		return -EINVAL;

This here is racy. It can be that the debugfs file is opened and read in just the exact moment the client it belongs to is closed.

You need to do something like this here:

if (!file_ref_get(&file->file->f_ref))
	return -EINVAL;

And then an fput(file->file) at the end of the function.

It would probably best to have a helper for that in drm_debugfs.c since that needs to be done for all per client debugfs files.

> +
> +	fpriv = file->driver_priv;
> +	if (!fpriv || !fpriv->vm.root.bo)
> +		return -ENODEV;
> +
> +	root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
> +	r = amdgpu_bo_reserve(root_bo, true);
> +	if (r) {
> +		amdgpu_bo_unref(&root_bo);
> +		return 0;
> +	}
> +
> +	rcu_read_lock();
> +	pid = rcu_dereference(file->pid);
> +	task = pid_task(pid, PIDTYPE_TGID);
> +
> +	seq_printf(m, "pid: %d\n", task ? task->pid : 0);
> +	seq_printf(m, "comm: %s\n", task ? task->comm : "Unset");

Thinking more about it, the pid and task name should probably be a different driver independent file, e.g. implemented in drm_debugfs.c

Because that is something all drivers should be able to provide.

We could potentially print the same line of information we print in the clints debugfs file.

Regards,
Christian.

> +	seq_printf(m, "pt_base: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo));
> +
> +	rcu_read_unlock();
> +	amdgpu_bo_unreserve(root_bo);
> +	amdgpu_bo_unref(&root_bo);
> +
> +	return 0;
> +}
> +
> +static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, amdgpu_pt_info_read, inode->i_private);
> +}
> +
> +static const struct file_operations amdgpu_pt_info_fops = {
> +	.owner = THIS_MODULE,
> +	.open = amdgpu_pt_info_open,
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = single_release,
> +};
> +#endif
> +
>  /**
>   * amdgpu_vm_init - initialize a vm instance
>   *
>   * @adev: amdgpu_device pointer
>   * @vm: requested vm
>   * @xcp_id: GPU partition selection id
> + * @file: drm_file
>   *
>   * Init @vm fields.
>   *
> @@ -2533,7 +2588,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>   * 0 for success, error for failure.
>   */
>  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> -		   int32_t xcp_id)
> +		   int32_t xcp_id, struct drm_file *file)
>  {
>  	struct amdgpu_bo *root_bo;
>  	struct amdgpu_bo_vm *root;
> @@ -2609,6 +2664,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  	if (r)
>  		DRM_DEBUG("Failed to create task info for VM\n");
>  
> +	debugfs_create_file("pt_info", 0444, file->debugfs_client, file, &amdgpu_pt_info_fops);
>  	amdgpu_bo_unreserve(vm->root.bo);
>  	amdgpu_bo_unref(&root_bo);
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index f3ad687125ad..555afaf867c4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -487,7 +487,9 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  			u32 pasid);
>  
>  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id,
> +		   struct drm_file *file);
> +
>  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>  int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,