[PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions
Arunpravin Paneer Selvam
arunpravin.paneerselvam at amd.com
Mon Feb 27 13:20:21 UTC 2023
Hi Christian,
On 2/27/2023 6:29 PM, Christian König wrote:
> Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
>> This patch introduces new IOCTL for userqueue secure semaphore.
>>
>> The signal IOCTL called from userspace application creates a drm
>> syncobj and array of bo GEM handles and passed in as parameter to
>> the driver to install the fence into it.
>>
>> The wait IOCTL gets an array of drm syncobjs, finds the fences
>> attached to the drm syncobjs and obtain the array of
>> memory_address/fence_value combintion which are returned to
>> userspace.
>>
>> v2: Worked on review comments from Christian for the following
>> modifications
>>
>> - Install fence into GEM BO object.
>> - Lock all BO's using the dma resv subsystem
>> - Reorder the sequence in signal IOCTL function.
>> - Get write pointer from the shadow wptr
>> - use userq_fence to fetch the va/value in wait IOCTL.
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 258 ++++++++++++++++++
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 6 +
>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 1 +
>> 5 files changed, 270 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 1c3eba2d0390..255d73795493 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -964,6 +964,8 @@ struct amdgpu_device {
>> struct amdgpu_mes mes;
>> struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
>> + struct amdgpu_userq_mgr *userq_mgr;
>> +
>> /* df */
>> struct amdgpu_df df;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index 6b7ac1ebd04c..66a7304fabe3 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[]
>> = {
>> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl,
>> DRM_AUTH|DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl,
>> DRM_AUTH|DRM_RENDER_ALLOW),
>> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING,
>> amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL,
>> amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl,
>> DRM_AUTH|DRM_RENDER_ALLOW),
>> +
>> };
>> static const struct drm_driver amdgpu_kms_driver = {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> index 609a7328e9a6..26fd1d4f758a 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -249,3 +249,261 @@ static const struct dma_fence_ops
>> amdgpu_userq_fence_ops = {
>> .signaled = amdgpu_userq_fence_signaled,
>> .release = amdgpu_userq_fence_release,
>> };
>> +
>> +static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
>> + struct amdgpu_usermode_queue *queue,
>> + u64 *wptr)
>> +{
>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>> + struct amdgpu_bo_va_mapping *mapping;
>> + struct amdgpu_vm *vm = &fpriv->vm;
>> + struct amdgpu_bo *bo;
>> + u64 *ptr;
>> + int r;
>> +
>> + mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr
>> >> PAGE_SHIFT);
>> + if (!mapping)
>> + return -EINVAL;
>> +
>> + bo = mapping->bo_va->base.bo;
>> + r = amdgpu_bo_kmap(bo, (void **)&ptr);
>
> Oh, that's not something you can do that easily.
>
> The BO must be reserved (locked) first if you want to call
> amdgpu_bo_kmap() on it.
sure, I will take care
>
>> + if (r) {
>> + DRM_ERROR("Failed mapping the userqueue wptr bo");
>> + return r;
>> + }
>> +
>> + *wptr = le64_to_cpu(*ptr);
>> +
>> + return 0;
>> +}
>> +
>> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *filp)
>> +{
>> + struct drm_amdgpu_userq_signal *args = data;
>> + struct amdgpu_device *adev = drm_to_adev(dev);
>> + struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
>> + struct amdgpu_usermode_queue *queue;
>> + struct drm_syncobj *syncobj = NULL;
>> + struct drm_gem_object **gobj;
>> + u64 num_bo_handles, wptr;
>> + struct dma_fence *fence;
>> + u32 *bo_handles;
>> + bool shared;
>> + int r, i;
>> +
>> + /* Retrieve the user queue */
>> + queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
>> + if (!queue)
>> + return -ENOENT;
>> +
>> + r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
>> + if (r)
>> + return -EINVAL;
>> +
>> + /* Find Syncobj if any */
>> + syncobj = drm_syncobj_find(filp, args->handle);
>> +
>> + /* Array of bo handles */
>> + num_bo_handles = args->num_bo_handles;
>> + bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles),
>> GFP_KERNEL);
>> + if (bo_handles == NULL)
>> + return -ENOMEM;
>> +
>> + if (copy_from_user(bo_handles,
>> u64_to_user_ptr(args->bo_handles_array),
>> + sizeof(u32) * num_bo_handles)) {
>> + r = -EFAULT;
>> + goto err_free_handles;
>> + }
>> +
>> + /* Array of GEM object handles */
>> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
>> + if (gobj == NULL) {
>> + r = -ENOMEM;
>> + goto err_free_handles;
>> + }
>> +
>> + for (i = 0; i < num_bo_handles; i++) {
>> + /* Retrieve GEM object */
>> + gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
>> + if (!gobj[i]) {
>> + r = -ENOENT;
>> + goto err_put_gobj;
>> + }
>> +
>> + dma_resv_lock(gobj[i]->resv, NULL);
>> + r = dma_resv_reserve_fences(gobj[i]->resv, 1);
I am reserving place for each BO here, should we move this down?
Thanks,
Arun
>> + if (r) {
>> + dma_resv_unlock(gobj[i]->resv);
>> + goto err_put_gobj;
>> + }
>> + dma_resv_unlock(gobj[i]->resv);
>> + }
>> +
>> + /* Create a new fence */
>> + r = amdgpu_userq_fence_create(queue, wptr, &fence);
>> + if (!fence)
>> + goto err_put_gobj;
>> +
>> + /* Add the created fence to syncobj/BO's */
>> + if (syncobj)
>> + drm_syncobj_replace_fence(syncobj, fence);
>> +
>> + shared = args->bo_flags & AMDGPU_USERQ_BO_READ;
>> + for (i = 0; i < num_bo_handles; i++) {
>> + dma_resv_lock(gobj[i]->resv, NULL);
>> + dma_resv_add_fence(gobj[i]->resv, fence, shared ?
>> + DMA_RESV_USAGE_READ :
>> + DMA_RESV_USAGE_WRITE);
>> + dma_resv_unlock(gobj[i]->resv);
>> + }
>
> That will still not work correctly. You've forgotten to call
> dma_resv_reserve_fences().
>
> The tricky part is that you need to do this for all BOs at the same time.
>
> I will work on my drm_exec() patch set once more. That one should make
> this job much easier.
>
> Similar applies to the _wait function, but let's get _signal working
> first.
>
> Regards,
> Christian.
>
>> +
>> + for (i = 0; i < num_bo_handles; i++)
>> + drm_gem_object_put(gobj[i]);
>> +
>> + dma_fence_put(fence);
>> +
>> + /* Free all handles */
>> + kfree(bo_handles);
>> + kfree(gobj);
>> +
>> + return 0;
>> +
>> +err_put_gobj:
>> + while (i-- > 0)
>> + drm_gem_object_put(gobj[i]);
>> + kfree(gobj);
>> +err_free_handles:
>> + kfree(bo_handles);
>> +
>> + return r;
>> +}
>> +
>> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *filp)
>> +{
>> + struct drm_amdgpu_userq_fence_info *fence_info = NULL;
>> + struct drm_amdgpu_userq_wait *args = data;
>> + struct amdgpu_userq_fence *userq_fence;
>> + u32 *syncobj_handles, *bo_handles;
>> + u64 num_syncobj, num_bo_handles;
>> + struct drm_gem_object **gobj;
>> + struct dma_fence *fence;
>> + bool wait_flag;
>> + int r, i, tmp;
>> +
>> + /* Array of Syncobj handles */
>> + num_syncobj = args->count_handles;
>> + syncobj_handles = kmalloc_array(num_syncobj,
>> sizeof(*syncobj_handles), GFP_KERNEL);
>> + if (!syncobj_handles)
>> + return -ENOMEM;
>> +
>> + if (copy_from_user(syncobj_handles, u64_to_user_ptr(args->handles),
>> + sizeof(u32) * num_syncobj)) {
>> + r = -EFAULT;
>> + goto err_free_syncobj_handles;
>> + }
>> +
>> + /* Array of bo handles */
>> + num_bo_handles = args->num_bo_handles;
>> + bo_handles = kmalloc_array(num_bo_handles, sizeof(*bo_handles),
>> GFP_KERNEL);
>> + if (!bo_handles)
>> + goto err_free_syncobj_handles;
>> +
>> + if (copy_from_user(bo_handles,
>> u64_to_user_ptr(args->bo_handles_array),
>> + sizeof(u32) * num_bo_handles)) {
>> + r = -EFAULT;
>> + goto err_free_bo_handles;
>> + }
>> +
>> + /* Array of fence gpu address */
>> + fence_info = kmalloc_array(num_syncobj + num_bo_handles,
>> sizeof(*fence_info), GFP_KERNEL);
>> + if (!fence_info) {
>> + r = -ENOMEM;
>> + goto err_free_bo_handles;
>> + }
>> +
>> + /* Retrieve syncobj's fence */
>> + for (i = 0; i < num_syncobj; i++) {
>> + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0,
>> &fence);
>> + if (r) {
>> + DRM_ERROR("syncobj %u failed to find the fence!\n",
>> syncobj_handles[i]);
>> + r = -ENOENT;
>> + goto err_free_fence_info;
>> + }
>> +
>> + /* Store drm syncobj's gpu va address and value */
>> + userq_fence = to_amdgpu_userq_fence(fence);
>> + fence_info[i].va = userq_fence->fence_drv->gpu_addr;
>> + fence_info[i].value = fence->seqno;
>> + dma_fence_put(fence);
>> + }
>> +
>> + tmp = i;
>> +
>> + /* Array of GEM object handles */
>> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
>> + if (gobj == NULL) {
>> + r = -ENOMEM;
>> + goto err_free_fence_info;
>> + }
>> +
>> + /* Retrieve GEM objects's fence */
>> + wait_flag = args->bo_wait_flags & AMDGPU_USERQ_BO_READ;
>> + for (i = 0; i < num_bo_handles; i++, tmp++) {
>> + struct dma_fence *bo_fence;
>> +
>> + gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
>> + if (!gobj[i]) {
>> + r = -ENOENT;
>> + goto err_put_gobj;
>> + }
>> +
>> + dma_resv_lock(gobj[i]->resv, NULL);
>> + r = dma_resv_get_singleton(gobj[i]->resv,
>> + wait_flag ?
>> + DMA_RESV_USAGE_READ :
>> + DMA_RESV_USAGE_WRITE,
>> + &bo_fence);
>> + if (r) {
>> + dma_resv_unlock(gobj[i]->resv);
>> + goto err_put_gobj;
>> + }
>> + dma_resv_unlock(gobj[i]->resv);
>> + drm_gem_object_put(gobj[i]);
>> +
>> + /* Store GEM objects's gpu va address and value */
>> + userq_fence = to_amdgpu_userq_fence(bo_fence);
>> + fence_info[tmp].va = userq_fence->fence_drv->gpu_addr;
>> + fence_info[tmp].value = bo_fence->seqno;
>> + dma_fence_put(bo_fence);
>> + }
>> +
>> + if (copy_to_user(u64_to_user_ptr(args->userq_fence_info),
>> + fence_info, sizeof(fence_info))) {
>> + r = -EFAULT;
>> + goto err_free_gobj;
>> + }
>> +
>> + /* Free all handles */
>> + kfree(syncobj_handles);
>> + kfree(bo_handles);
>> + kfree(fence_info);
>> + kfree(gobj);
>> +
>> + return 0;
>> +
>> +err_put_gobj:
>> + while (i-- > 0)
>> + drm_gem_object_put(gobj[i]);
>> +err_free_gobj:
>> + kfree(gobj);
>> +err_free_fence_info:
>> + kfree(fence_info);
>> +err_free_bo_handles:
>> + kfree(bo_handles);
>> +err_free_syncobj_handles:
>> + kfree(syncobj_handles);
>> +
>> + return r;
>> +}
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> index 230dd54b4cd3..999d1e2baff5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> @@ -27,6 +27,8 @@
>> #include <linux/types.h>
>> +#define AMDGPU_USERQ_BO_READ 0x1
>> +
>> struct amdgpu_userq_fence {
>> struct dma_fence base;
>> /* userq fence lock */
>> @@ -57,5 +59,9 @@ int amdgpu_userq_fence_create(struct
>> amdgpu_usermode_queue *userq,
>> u64 seq, struct dma_fence **f);
>> bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver
>> *fence_drv);
>> void amdgpu_userq_fence_driver_destroy(struct kref *ref);
>> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *filp);
>> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>> + struct drm_file *filp);
>> #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index d4317b0f6487..4d3d6777a178 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -457,6 +457,7 @@ int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr
>> *userq_mgr, struct amdgpu_devi
>> idr_init_base(&userq_mgr->userq_idr, 1);
>> INIT_LIST_HEAD(&userq_mgr->userq_list);
>> userq_mgr->adev = adev;
>> + adev->userq_mgr = userq_mgr;
>> r = amdgpu_userqueue_setup_mqd_funcs(userq_mgr);
>> if (r) {
>
More information about the amd-gfx
mailing list