[PATCH 5/6] drm/amdgpu: Implement userqueue signal/wait IOCTL functions
Arunpravin Paneer Selvam
arunpravin.paneerselvam at amd.com
Mon Feb 27 13:35:33 UTC 2023
On 2/27/2023 6:53 PM, Christian König wrote:
> Hi Arun,
>
> Am 27.02.23 um 14:20 schrieb Arunpravin Paneer Selvam:
>> Hi Christian,
>>
>>
>> On 2/27/2023 6:29 PM, Christian König wrote:
>>> Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
>>>> This patch introduces new IOCTL for userqueue secure semaphore.
>>>>
>>>> The signal IOCTL called from userspace application creates a drm
>>>> syncobj and array of bo GEM handles and passed in as parameter to
>>>> the driver to install the fence into it.
>>>>
>>>> The wait IOCTL gets an array of drm syncobjs, finds the fences
>>>> attached to the drm syncobjs and obtain the array of
>>>> memory_address/fence_value combintion which are returned to
>>>> userspace.
>>>>
>>>> v2: Worked on review comments from Christian for the following
>>>> modifications
>>>>
>>>> - Install fence into GEM BO object.
>>>> - Lock all BO's using the dma resv subsystem
>>>> - Reorder the sequence in signal IOCTL function.
>>>> - Get write pointer from the shadow wptr
>>>> - use userq_fence to fetch the va/value in wait IOCTL.
>>>>
>>>> Signed-off-by: Arunpravin Paneer Selvam
>>>> <Arunpravin.PaneerSelvam at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 +
>>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 258
>>>> ++++++++++++++++++
>>>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 6 +
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 1 +
>>>> 5 files changed, 270 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> index 1c3eba2d0390..255d73795493 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>>> @@ -964,6 +964,8 @@ struct amdgpu_device {
>>>> struct amdgpu_mes mes;
>>>> struct amdgpu_mqd mqds[AMDGPU_HW_IP_NUM];
>>>> + struct amdgpu_userq_mgr *userq_mgr;
>>>> +
>>>> /* df */
>>>> struct amdgpu_df df;
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> index 6b7ac1ebd04c..66a7304fabe3 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>>> @@ -2752,6 +2752,9 @@ const struct drm_ioctl_desc
>>>> amdgpu_ioctls_kms[] = {
>>>> DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR,
>>>> amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>>> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ, amdgpu_userq_ioctl,
>>>> DRM_AUTH|DRM_RENDER_ALLOW),
>>>> DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_DOORBELL_RING,
>>>> amdgpu_userq_doorbell_ring_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>>> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL,
>>>> amdgpu_userq_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
>>>> + DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl,
>>>> DRM_AUTH|DRM_RENDER_ALLOW),
>>>> +
>>>> };
>>>> static const struct drm_driver amdgpu_kms_driver = {
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> index 609a7328e9a6..26fd1d4f758a 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>>>> @@ -249,3 +249,261 @@ static const struct dma_fence_ops
>>>> amdgpu_userq_fence_ops = {
>>>> .signaled = amdgpu_userq_fence_signaled,
>>>> .release = amdgpu_userq_fence_release,
>>>> };
>>>> +
>>>> +static int amdgpu_userq_fence_read_wptr(struct drm_file *filp,
>>>> + struct amdgpu_usermode_queue *queue,
>>>> + u64 *wptr)
>>>> +{
>>>> + struct amdgpu_fpriv *fpriv = filp->driver_priv;
>>>> + struct amdgpu_bo_va_mapping *mapping;
>>>> + struct amdgpu_vm *vm = &fpriv->vm;
>>>> + struct amdgpu_bo *bo;
>>>> + u64 *ptr;
>>>> + int r;
>>>> +
>>>> + mapping = amdgpu_vm_bo_lookup_mapping(vm, queue->wptr_gpu_addr
>>>> >> PAGE_SHIFT);
>>>> + if (!mapping)
>>>> + return -EINVAL;
>>>> +
>>>> + bo = mapping->bo_va->base.bo;
>>>> + r = amdgpu_bo_kmap(bo, (void **)&ptr);
>>>
>>> Oh, that's not something you can do that easily.
>>>
>>> The BO must be reserved (locked) first if you want to call
>>> amdgpu_bo_kmap() on it.
>> sure, I will take care
>>>
>>>> + if (r) {
>>>> + DRM_ERROR("Failed mapping the userqueue wptr bo");
>>>> + return r;
>>>> + }
>>>> +
>>>> + *wptr = le64_to_cpu(*ptr);
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>>>> + struct drm_file *filp)
>>>> +{
>>>> + struct drm_amdgpu_userq_signal *args = data;
>>>> + struct amdgpu_device *adev = drm_to_adev(dev);
>>>> + struct amdgpu_userq_mgr *userq_mgr = adev->userq_mgr;
>>>> + struct amdgpu_usermode_queue *queue;
>>>> + struct drm_syncobj *syncobj = NULL;
>>>> + struct drm_gem_object **gobj;
>>>> + u64 num_bo_handles, wptr;
>>>> + struct dma_fence *fence;
>>>> + u32 *bo_handles;
>>>> + bool shared;
>>>> + int r, i;
>>>> +
>>>> + /* Retrieve the user queue */
>>>> + queue = idr_find(&userq_mgr->userq_idr, args->queue_id);
>>>> + if (!queue)
>>>> + return -ENOENT;
>>>> +
>>>> + r = amdgpu_userq_fence_read_wptr(filp, queue, &wptr);
>>>> + if (r)
>>>> + return -EINVAL;
>>>> +
>>>> + /* Find Syncobj if any */
>>>> + syncobj = drm_syncobj_find(filp, args->handle);
>>>> +
>>>> + /* Array of bo handles */
>>>> + num_bo_handles = args->num_bo_handles;
>>>> + bo_handles = kmalloc_array(num_bo_handles,
>>>> sizeof(*bo_handles), GFP_KERNEL);
>>>> + if (bo_handles == NULL)
>>>> + return -ENOMEM;
>>>> +
>>>> + if (copy_from_user(bo_handles,
>>>> u64_to_user_ptr(args->bo_handles_array),
>>>> + sizeof(u32) * num_bo_handles)) {
>>>> + r = -EFAULT;
>>>> + goto err_free_handles;
>>>> + }
>>>> +
>>>> + /* Array of GEM object handles */
>>>> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
>>>> + if (gobj == NULL) {
>>>> + r = -ENOMEM;
>>>> + goto err_free_handles;
>>>> + }
>>>> +
>>>> + for (i = 0; i < num_bo_handles; i++) {
>>>> + /* Retrieve GEM object */
>>>> + gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
>>>> + if (!gobj[i]) {
>>>> + r = -ENOENT;
>>>> + goto err_put_gobj;
>>>> + }
>>>> +
>>>> + dma_resv_lock(gobj[i]->resv, NULL);
>>>> + r = dma_resv_reserve_fences(gobj[i]->resv, 1);
>> I am reserving place for each BO here, should we move this down?
>
> See below.
>
>>
>> Thanks,
>> Arun
>>>> + if (r) {
>>>> + dma_resv_unlock(gobj[i]->resv);
>>>> + goto err_put_gobj;
>>>> + }
>>>> + dma_resv_unlock(gobj[i]->resv);
>
> The problem is here. You can't unlock the BO after you reserved the
> fence slot or the reservation is dropped again. What you need to do is
> to lock all BOs first and then reserve the fence slot.
>
> I've coded together a drm_exec helper to make that simple a while ago,
> but never found the time to upstream it. Give me a day or so to rebase
> the code.
sure, I will work on other comments.
>
> Regards,
> Christian.
>
>>>> + }
>>>> +
>>>> + /* Create a new fence */
>>>> + r = amdgpu_userq_fence_create(queue, wptr, &fence);
>>>> + if (!fence)
>>>> + goto err_put_gobj;
>>>> +
>>>> + /* Add the created fence to syncobj/BO's */
>>>> + if (syncobj)
>>>> + drm_syncobj_replace_fence(syncobj, fence);
>>>> +
>>>> + shared = args->bo_flags & AMDGPU_USERQ_BO_READ;
>>>> + for (i = 0; i < num_bo_handles; i++) {
>>>> + dma_resv_lock(gobj[i]->resv, NULL);
>>>> + dma_resv_add_fence(gobj[i]->resv, fence, shared ?
>>>> + DMA_RESV_USAGE_READ :
>>>> + DMA_RESV_USAGE_WRITE);
>>>> + dma_resv_unlock(gobj[i]->resv);
>>>> + }
>>>
>>> That will still not work correctly. You've forgotten to call
>>> dma_resv_reserve_fences().
>>>
>>> The tricky part is that you need to do this for all BOs at the same
>>> time.
>>>
>>> I will work on my drm_exec() patch set once more. That one should
>>> make this job much easier.
>>>
>>> Similar applies to the _wait function, but let's get _signal working
>>> first.
>>>
>>> Regards,
>>> Christian.
>>>
>>>> +
>>>> + for (i = 0; i < num_bo_handles; i++)
>>>> + drm_gem_object_put(gobj[i]);
>>>> +
>>>> + dma_fence_put(fence);
>>>> +
>>>> + /* Free all handles */
>>>> + kfree(bo_handles);
>>>> + kfree(gobj);
>>>> +
>>>> + return 0;
>>>> +
>>>> +err_put_gobj:
>>>> + while (i-- > 0)
>>>> + drm_gem_object_put(gobj[i]);
>>>> + kfree(gobj);
>>>> +err_free_handles:
>>>> + kfree(bo_handles);
>>>> +
>>>> + return r;
>>>> +}
>>>> +
>>>> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>>>> + struct drm_file *filp)
>>>> +{
>>>> + struct drm_amdgpu_userq_fence_info *fence_info = NULL;
>>>> + struct drm_amdgpu_userq_wait *args = data;
>>>> + struct amdgpu_userq_fence *userq_fence;
>>>> + u32 *syncobj_handles, *bo_handles;
>>>> + u64 num_syncobj, num_bo_handles;
>>>> + struct drm_gem_object **gobj;
>>>> + struct dma_fence *fence;
>>>> + bool wait_flag;
>>>> + int r, i, tmp;
>>>> +
>>>> + /* Array of Syncobj handles */
>>>> + num_syncobj = args->count_handles;
>>>> + syncobj_handles = kmalloc_array(num_syncobj,
>>>> sizeof(*syncobj_handles), GFP_KERNEL);
>>>> + if (!syncobj_handles)
>>>> + return -ENOMEM;
>>>> +
>>>> + if (copy_from_user(syncobj_handles,
>>>> u64_to_user_ptr(args->handles),
>>>> + sizeof(u32) * num_syncobj)) {
>>>> + r = -EFAULT;
>>>> + goto err_free_syncobj_handles;
>>>> + }
>>>> +
>>>> + /* Array of bo handles */
>>>> + num_bo_handles = args->num_bo_handles;
>>>> + bo_handles = kmalloc_array(num_bo_handles,
>>>> sizeof(*bo_handles), GFP_KERNEL);
>>>> + if (!bo_handles)
>>>> + goto err_free_syncobj_handles;
>>>> +
>>>> + if (copy_from_user(bo_handles,
>>>> u64_to_user_ptr(args->bo_handles_array),
>>>> + sizeof(u32) * num_bo_handles)) {
>>>> + r = -EFAULT;
>>>> + goto err_free_bo_handles;
>>>> + }
>>>> +
>>>> + /* Array of fence gpu address */
>>>> + fence_info = kmalloc_array(num_syncobj + num_bo_handles,
>>>> sizeof(*fence_info), GFP_KERNEL);
>>>> + if (!fence_info) {
>>>> + r = -ENOMEM;
>>>> + goto err_free_bo_handles;
>>>> + }
>>>> +
>>>> + /* Retrieve syncobj's fence */
>>>> + for (i = 0; i < num_syncobj; i++) {
>>>> + r = drm_syncobj_find_fence(filp, syncobj_handles[i], 0, 0,
>>>> &fence);
>>>> + if (r) {
>>>> + DRM_ERROR("syncobj %u failed to find the fence!\n",
>>>> syncobj_handles[i]);
>>>> + r = -ENOENT;
>>>> + goto err_free_fence_info;
>>>> + }
>>>> +
>>>> + /* Store drm syncobj's gpu va address and value */
>>>> + userq_fence = to_amdgpu_userq_fence(fence);
>>>> + fence_info[i].va = userq_fence->fence_drv->gpu_addr;
>>>> + fence_info[i].value = fence->seqno;
>>>> + dma_fence_put(fence);
>>>> + }
>>>> +
>>>> + tmp = i;
>>>> +
>>>> + /* Array of GEM object handles */
>>>> + gobj = kmalloc_array(num_bo_handles, sizeof(*gobj), GFP_KERNEL);
>>>> + if (gobj == NULL) {
>>>> + r = -ENOMEM;
>>>> + goto err_free_fence_info;
>>>> + }
>>>> +
>>>> + /* Retrieve GEM objects's fence */
>>>> + wait_flag = args->bo_wait_flags & AMDGPU_USERQ_BO_READ;
>>>> + for (i = 0; i < num_bo_handles; i++, tmp++) {
>>>> + struct dma_fence *bo_fence;
>>>> +
>>>> + gobj[i] = drm_gem_object_lookup(filp, bo_handles[i]);
>>>> + if (!gobj[i]) {
>>>> + r = -ENOENT;
>>>> + goto err_put_gobj;
>>>> + }
>>>> +
>>>> + dma_resv_lock(gobj[i]->resv, NULL);
>>>> + r = dma_resv_get_singleton(gobj[i]->resv,
>>>> + wait_flag ?
>>>> + DMA_RESV_USAGE_READ :
>>>> + DMA_RESV_USAGE_WRITE,
>>>> + &bo_fence);
>>>> + if (r) {
>>>> + dma_resv_unlock(gobj[i]->resv);
>>>> + goto err_put_gobj;
>>>> + }
>>>> + dma_resv_unlock(gobj[i]->resv);
>>>> + drm_gem_object_put(gobj[i]);
>>>> +
>>>> + /* Store GEM objects's gpu va address and value */
>>>> + userq_fence = to_amdgpu_userq_fence(bo_fence);
>>>> + fence_info[tmp].va = userq_fence->fence_drv->gpu_addr;
>>>> + fence_info[tmp].value = bo_fence->seqno;
>>>> + dma_fence_put(bo_fence);
>>>> + }
>>>> +
>>>> + if (copy_to_user(u64_to_user_ptr(args->userq_fence_info),
>>>> + fence_info, sizeof(fence_info))) {
>>>> + r = -EFAULT;
>>>> + goto err_free_gobj;
>>>> + }
>>>> +
>>>> + /* Free all handles */
>>>> + kfree(syncobj_handles);
>>>> + kfree(bo_handles);
>>>> + kfree(fence_info);
>>>> + kfree(gobj);
>>>> +
>>>> + return 0;
>>>> +
>>>> +err_put_gobj:
>>>> + while (i-- > 0)
>>>> + drm_gem_object_put(gobj[i]);
>>>> +err_free_gobj:
>>>> + kfree(gobj);
>>>> +err_free_fence_info:
>>>> + kfree(fence_info);
>>>> +err_free_bo_handles:
>>>> + kfree(bo_handles);
>>>> +err_free_syncobj_handles:
>>>> + kfree(syncobj_handles);
>>>> +
>>>> + return r;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> index 230dd54b4cd3..999d1e2baff5 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>>> @@ -27,6 +27,8 @@
>>>> #include <linux/types.h>
>>>> +#define AMDGPU_USERQ_BO_READ 0x1
>>>> +
>>>> struct amdgpu_userq_fence {
>>>> struct dma_fence base;
>>>> /* userq fence lock */
>>>> @@ -57,5 +59,9 @@ int amdgpu_userq_fence_create(struct
>>>> amdgpu_usermode_queue *userq,
>>>> u64 seq, struct dma_fence **f);
>>>> bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver
>>>> *fence_drv);
>>>> void amdgpu_userq_fence_driver_destroy(struct kref *ref);
>>>> +int amdgpu_userq_signal_ioctl(struct drm_device *dev, void *data,
>>>> + struct drm_file *filp);
>>>> +int amdgpu_userq_wait_ioctl(struct drm_device *dev, void *data,
>>>> + struct drm_file *filp);
>>>> #endif
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> index d4317b0f6487..4d3d6777a178 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> @@ -457,6 +457,7 @@ int amdgpu_userq_mgr_init(struct
>>>> amdgpu_userq_mgr *userq_mgr, struct amdgpu_devi
>>>> idr_init_base(&userq_mgr->userq_idr, 1);
>>>> INIT_LIST_HEAD(&userq_mgr->userq_list);
>>>> userq_mgr->adev = adev;
>>>> + adev->userq_mgr = userq_mgr;
>>>> r = amdgpu_userqueue_setup_mqd_funcs(userq_mgr);
>>>> if (r) {
>>>
>>
>
More information about the amd-gfx
mailing list