[PATCH v5 4/5] drm/amdgpu: add debugfs support for VM pagetable per client
Khatri, Sunil
sukhatri at amd.com
Thu Jun 26 14:11:47 UTC 2025
On 6/26/2025 5:48 PM, Christian König wrote:
> On 24.06.25 13:34, Sunil Khatri wrote:
>> Each drm node is associated with a unique client-id.
>> Create a directory for each drm-file in the dri root
>> directory. This directory is unique to hold information
>> related to a client id which is unique in the system
>> irrespective of how many drm devices are on the system.
>>
>> Adding root page table base address of the VM under
>> the client-id node along with the process information
>> in debugfs.
>>
>> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 58 ++++++++++++++++++++++++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 +-
>> 3 files changed, 61 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> index d2ce7d86dbc8..aa912168fd68 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>> @@ -1395,7 +1395,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
>> if (r)
>> goto error_pasid;
>>
>> - r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
>> + r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id, file_priv);
>> if (r)
>> goto error_pasid;
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 3911c78f8282..9e3dd187b597 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -2520,12 +2520,67 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>> get_task_comm(vm->task_info->process_name, current->group_leader);
>> }
>>
>> +#if defined(CONFIG_DEBUG_FS)
>> +static int amdgpu_pt_info_read(struct seq_file *m, void *unused)
>> +{
>> + struct drm_file *file;
>> + struct amdgpu_fpriv *fpriv;
>> + struct pid *pid;
>> + struct task_struct *task;
>> + struct amdgpu_bo *root_bo;
>> + int r;
>> +
>> + file = (struct drm_file *)m->private;
>> + if (!file || !file->driver_priv)
>> + return -EINVAL;
> This here is racy. It can be that the debugfs file is opened and read in just the exact moment the client it belongs to is closed.'
I dont see drm_file ref_get being taken anywhere in our driver. It's
directly being used.
>
> You need to do something like this here:
>
> if (!file_ref_get(&file->file->f_ref))
> return -EINVAL;
>
> And then an fput(file->file) at the end of the function.
>
> It would probably best to have a helper for that in drm_debugfs.c since that needs to be done for all per client debugfs files.
In case you think we still need it then why not directly user
file_ref_get and fput itself. why need a new function all together for
one line.
>
>> +
>> + fpriv = file->driver_priv;
>> + if (!fpriv || !fpriv->vm.root.bo)
>> + return -ENODEV;
>> +
>> + root_bo = amdgpu_bo_ref(fpriv->vm.root.bo);
>> + r = amdgpu_bo_reserve(root_bo, true);
>> + if (r) {
>> + amdgpu_bo_unref(&root_bo);
>> + return 0;
>> + }
>> +
>> + rcu_read_lock();
>> + pid = rcu_dereference(file->pid);
>> + task = pid_task(pid, PIDTYPE_TGID);
>> +
>> + seq_printf(m, "pid: %d\n", task ? task->pid : 0);
>> + seq_printf(m, "comm: %s\n", task ? task->comm : "Unset");
> Thinking more about it, the pid and task name should probably be a different driver independent file, e.g. implemented in drm_debugfs.c
>
> Because that is something all drivers should be able to provide.
>
> We could potentially print the same line of information we print in the clints debugfs file.
We do exactly same thing in drm_debugfs in drm_file_err, and i am doing
exactly same thing here. If not, there in drm_debugfs i will have to
create one for debugfs file there for each client but doing it here for
both information in one point here. But i am open any if you think that
would be a better approach.
Regards
Sunil Khatri
>
> Regards,
> Christian.
>
>> + seq_printf(m, "pt_base: 0x%llx\n", amdgpu_bo_gpu_offset(fpriv->vm.root.bo));
>> +
>> + rcu_read_unlock();
>> + amdgpu_bo_unreserve(root_bo);
>> + amdgpu_bo_unref(&root_bo);
>> +
>> + return 0;
>> +}
>> +
>> +static int amdgpu_pt_info_open(struct inode *inode, struct file *file)
>> +{
>> + return single_open(file, amdgpu_pt_info_read, inode->i_private);
>> +}
>> +
>> +static const struct file_operations amdgpu_pt_info_fops = {
>> + .owner = THIS_MODULE,
>> + .open = amdgpu_pt_info_open,
>> + .read = seq_read,
>> + .llseek = seq_lseek,
>> + .release = single_release,
>> +};
>> +#endif
>> +
>> /**
>> * amdgpu_vm_init - initialize a vm instance
>> *
>> * @adev: amdgpu_device pointer
>> * @vm: requested vm
>> * @xcp_id: GPU partition selection id
>> + * @file: drm_file
>> *
>> * Init @vm fields.
>> *
>> @@ -2533,7 +2588,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
>> * 0 for success, error for failure.
>> */
>> int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> - int32_t xcp_id)
>> + int32_t xcp_id, struct drm_file *file)
>> {
>> struct amdgpu_bo *root_bo;
>> struct amdgpu_bo_vm *root;
>> @@ -2609,6 +2664,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> if (r)
>> DRM_DEBUG("Failed to create task info for VM\n");
>>
>> + debugfs_create_file("pt_info", 0444, file->debugfs_client, file, &amdgpu_pt_info_fops);
>> amdgpu_bo_unreserve(vm->root.bo);
>> amdgpu_bo_unref(&root_bo);
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> index f3ad687125ad..555afaf867c4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>> @@ -487,7 +487,9 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>> u32 pasid);
>>
>> long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id,
>> + struct drm_file *file);
>> +
>> int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>> void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>> int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
More information about the dri-devel
mailing list