[PATCH 2/6] drm/amdgpu: Implement a new userqueue fence driver
Arunpravin Paneer Selvam
arunpravin.paneerselvam at amd.com
Tue Feb 28 08:45:19 UTC 2023
Hi Christian,
On 2/27/2023 6:12 PM, Christian König wrote:
> Am 26.02.23 um 17:54 schrieb Arunpravin Paneer Selvam:
>> Developed a userqueue fence driver for the userqueue process shared
>> BO synchronization.
>>
>> Create a dma fence having write pointer as the seqno and allocate a
>> seq64 memory for each user queue process and feed this memory address
>> into the firmware/hardware, thus the firmware writes the read pointer
>> into the given address when the process completes it execution.
>> Compare wptr and rptr, if rptr >= wptr, signal the fences for the
>> waiting
>> process to consume the buffers.
>>
>> v2: Worked on review comments from Christian for the following
>> modifications
>>
>> - Add wptr as sequence number into the fence
>> - Add a reference count for the fence driver
>> - Add dma_fence_put below the list_del as it might frees the
>> userq fence.
>> - Trim unnecessary code in interrupt handler.
>> - Check dma fence signaled state in dma fence creation function
>> for a
>> potential problem of hardware completing the job processing
>> beforehand.
>> - Add necessary locks.
>> - Create a list and process all the unsignaled fences.
>> - clean up fences in destroy function.
>> - implement .enabled callback function
>
> A few more nit picks below, but from the technical side that looks
> mostly clean.
>
>>
>> Signed-off-by: Arunpravin Paneer Selvam
>> <Arunpravin.PaneerSelvam at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/Makefile | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 +
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.c | 251 ++++++++++++++++++
>> .../gpu/drm/amd/amdgpu/amdgpu_userq_fence.h | 61 +++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 20 ++
>> .../gpu/drm/amd/include/amdgpu_userqueue.h | 2 +
>> 6 files changed, 341 insertions(+), 1 deletion(-)
>> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
>> b/drivers/gpu/drm/amd/amdgpu/Makefile
>> index a239533a895f..ea09273b585f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
>> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
>> @@ -59,7 +59,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>> amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
>> amdgpu_fw_attestation.o amdgpu_securedisplay.o \
>> amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
>> - amdgpu_ring_mux.o amdgpu_seq64.o
>> + amdgpu_ring_mux.o amdgpu_seq64.o amdgpu_userq_fence.o
>> amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index bd3462d0da5f..6b7ac1ebd04c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -53,6 +53,7 @@
>> #include "amdgpu_xgmi.h"
>> #include "amdgpu_reset.h"
>> #include "amdgpu_userqueue.h"
>> +#include "amdgpu_userq_fence.h"
>> /*
>> * KMS wrapper.
>> @@ -2827,6 +2828,10 @@ static int __init amdgpu_init(void)
>> if (r)
>> goto error_fence;
>> + r = amdgpu_userq_fence_slab_init();
>> + if (r)
>> + goto error_fence;
>> +
>> DRM_INFO("amdgpu kernel modesetting enabled.\n");
>> amdgpu_register_atpx_handler();
>> amdgpu_acpi_detect();
>> @@ -2851,6 +2856,7 @@ static void __exit amdgpu_exit(void)
>> amdgpu_unregister_atpx_handler();
>> amdgpu_sync_fini();
>> amdgpu_fence_slab_fini();
>> + amdgpu_userq_fence_slab_fini();
>> mmu_notifier_synchronize();
>> }
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> new file mode 100644
>> index 000000000000..609a7328e9a6
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.c
>> @@ -0,0 +1,251 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright 2023 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#include <linux/kref.h>
>> +#include <linux/slab.h>
>> +
>> +#include <drm/drm_syncobj.h>
>> +
>> +#include "amdgpu.h"
>> +#include "amdgpu_userq_fence.h"
>> +#include "amdgpu_userqueue.h"
>> +
>> +static struct kmem_cache *amdgpu_userq_fence_slab;
>> +
>> +int amdgpu_userq_fence_slab_init(void)
>> +{
>> + amdgpu_userq_fence_slab = kmem_cache_create("amdgpu_userq_fence",
>> + sizeof(struct amdgpu_userq_fence),
>> + 0,
>> + SLAB_HWCACHE_ALIGN,
>> + NULL);
>> + if (!amdgpu_userq_fence_slab)
>> + return -ENOMEM;
>> +
>> + return 0;
>> +}
>> +
>> +void amdgpu_userq_fence_slab_fini(void)
>> +{
>> + rcu_barrier();
>> + kmem_cache_destroy(amdgpu_userq_fence_slab);
>> +}
>> +
>> +static inline struct amdgpu_userq_fence
>> *to_amdgpu_userq_fence(struct dma_fence *f)
>> +{
>> + struct amdgpu_userq_fence *__f = container_of(f, struct
>> amdgpu_userq_fence, base);
>> +
>> + if (!__f)
>> + return NULL;
>> +
>> + if (__f->base.ops == &amdgpu_userq_fence_ops)
>> + return __f;
>> +
>> + return NULL;
>> +}
>> +
>> +static u64 amdgpu_userq_fence_read(struct amdgpu_userq_fence_driver
>> *fence_drv)
>> +{
>> + return le64_to_cpu(*fence_drv->cpu_addr);
>> +}
>> +
>> +int amdgpu_userq_fence_driver_get(struct amdgpu_device *adev,
>> + struct amdgpu_usermode_queue *userq)
>
> Looks like you misunderstood me a bit.
>
> The usual naming convention in Linux for reference counted objects is
> like that:
>
> _alloc() or similar for a function creating the object (the one which
> has the kref_init).
> _get() for the function grabbing a reference.
> _put() for the function releasing a reference.
> _free() or _destroy() or similar for the function which is called by
> _put() to cleanup.
>
>> +{
>> + struct amdgpu_userq_fence_driver *fence_drv;
>> + int r;
>> +
>> + fence_drv = userq->fence_drv;
>> + if (!fence_drv)
>> + return -EINVAL;
>> +
>> + /* Acquire seq64 memory */
>> + r = amdgpu_seq64_get(adev, &fence_drv->gpu_addr,
>> + &fence_drv->cpu_addr);
>> + if (r)
>> + return -ENOMEM;
>> +
>> + kref_init(&fence_drv->refcount);
>> + INIT_LIST_HEAD(&fence_drv->fences);
>> + spin_lock_init(&fence_drv->fence_list_lock);
>> +
>> + fence_drv->adev = adev;
>> + fence_drv->context = dma_fence_context_alloc(1);
>> +
>> + get_task_comm(fence_drv->timeline_name, current);
>> +
>> + return 0;
>> +}
>> +
>> +void amdgpu_userq_fence_driver_destroy(struct kref *ref)
>> +{
>> + struct amdgpu_userq_fence_driver *fence_drv = container_of(ref,
>> + struct amdgpu_userq_fence_driver,
>> + refcount);
>> + struct amdgpu_device *adev = fence_drv->adev;
>> + struct amdgpu_userq_fence *fence, *tmp;
>> + struct dma_fence *f;
>> +
>> + spin_lock(&fence_drv->fence_list_lock);
>> + list_for_each_entry_safe(fence, tmp, &fence_drv->fences, link) {
>> + f = &fence->base;
>> +
>> + if (!dma_fence_is_signaled(f)) {
>> + dma_fence_set_error(f, -ECANCELED);
>> + dma_fence_signal(f);
>> + }
>> +
>> + list_del(&fence->link);
>> + dma_fence_put(f);
>> + }
>> +
>> + WARN_ON_ONCE(!list_empty(&fence_drv->fences));
>> + spin_unlock(&fence_drv->fence_list_lock);
>> +
>> + /* Free seq64 memory */
>> + amdgpu_seq64_free(adev, fence_drv->gpu_addr);
>> + kfree(fence_drv);
>> +}
>> +
>> +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver
>> *fence_drv)
>> +{
>> + kref_put(&fence_drv->refcount, amdgpu_userq_fence_driver_destroy);
>> +}
>> +
>> +int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
>> + u64 seq, struct dma_fence **f)
>> +{
>> + struct amdgpu_userq_fence_driver *fence_drv;
>> + struct amdgpu_userq_fence *userq_fence;
>> + struct dma_fence *fence;
>> +
>> + fence_drv = userq->fence_drv;
>> + if (!fence_drv)
>> + return -EINVAL;
>> +
>> + userq_fence = kmem_cache_alloc(amdgpu_userq_fence_slab,
>> GFP_ATOMIC);
>> + if (!userq_fence)
>> + return -ENOMEM;
>> +
>> + spin_lock_init(&userq_fence->lock);
>> + INIT_LIST_HEAD(&userq_fence->link);
>> + fence = &userq_fence->base;
>> + userq_fence->fence_drv = fence_drv;
>> +
>> + dma_fence_init(fence, &amdgpu_userq_fence_ops, &userq_fence->lock,
>> + fence_drv->context, seq);
>> +
>> + kref_get(&fence_drv->refcount);
>> +
>> + spin_lock(&fence_drv->fence_list_lock);
>> + /* Check if hardware has already processed the job */
>> + if (!dma_fence_is_signaled(fence)) {
>> + dma_fence_get(fence);
>> + list_add_tail(&userq_fence->link, &fence_drv->fences);
>> + dma_fence_put(fence);
>> + }
>> + spin_unlock(&fence_drv->fence_list_lock);
>
>> + /* Release the fence driver reference */
>> + amdgpu_userq_fence_driver_put(fence_drv);
>
> Hui? That doesn't make much sense. We should keep the reference as
> long as the fence exists or at least as long as it isn't signaled (the
> later is probably better, but tricky to get right).
>
>> +
>> + *f = fence;
>> +
>> + return 0;
>> +}
>> +
>> +bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver
>> *fence_drv)
>
> Maybe name that function amdgpu_userq_fence_driver_process() and move
> that up a bit to group together the functions dealing with the
> fence_driver and the functions dealing with the fence.
>
>> +{
>> + struct amdgpu_userq_fence *userq_fence, *tmp;
>> + struct dma_fence *fence;
>> + u64 rptr;
>> +
>> + if (!fence_drv)
>> + return false;
>> +
>> + rptr = amdgpu_userq_fence_read(fence_drv);
>> +
>> + spin_lock(&fence_drv->fence_list_lock);
>> + list_for_each_entry_safe(userq_fence, tmp, &fence_drv->fences,
>> link) {
>> + fence = &userq_fence->base;
>> +
>> + if (rptr >= fence->seqno) {
>> + dma_fence_signal(fence);
>> + list_del(&userq_fence->link);
>> +
>> + dma_fence_put(fence);
>> + } else {
>> + break;
>> + }
>> + }
>> + spin_unlock(&fence_drv->fence_list_lock);
>> +
>> + return true;
>
> BTW: What's the return value good for? Could we drop that?
Working on v3, will include all the review comments.
Thanks,
Arun
>
> Regards,
> Christian.
>
>> +}
>> +
>> +static const char *amdgpu_userq_fence_get_driver_name(struct
>> dma_fence *f)
>> +{
>> + return "amdgpu_userqueue_fence";
>> +}
>> +
>> +static const char *amdgpu_userq_fence_get_timeline_name(struct
>> dma_fence *f)
>> +{
>> + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
>> +
>> + return fence->fence_drv->timeline_name;
>> +}
>> +
>> +static bool amdgpu_userq_fence_signaled(struct dma_fence *f)
>> +{
>> + struct amdgpu_userq_fence *fence = to_amdgpu_userq_fence(f);
>> + struct amdgpu_userq_fence_driver *fence_drv = fence->fence_drv;
>> + u64 rptr, wptr;
>> +
>> + rptr = amdgpu_userq_fence_read(fence_drv);
>> + wptr = fence->base.seqno;
>> +
>> + if (rptr >= wptr)
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> +static void amdgpu_userq_fence_free(struct rcu_head *rcu)
>> +{
>> + struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);
>> +
>> + kmem_cache_free(amdgpu_userq_fence_slab, to_amdgpu_userq_fence(f));
>> +}
>> +
>> +static void amdgpu_userq_fence_release(struct dma_fence *f)
>> +{
>> + call_rcu(&f->rcu, amdgpu_userq_fence_free);
>> +}
>> +
>> +static const struct dma_fence_ops amdgpu_userq_fence_ops = {
>> + .use_64bit_seqno = true,
>> + .get_driver_name = amdgpu_userq_fence_get_driver_name,
>> + .get_timeline_name = amdgpu_userq_fence_get_timeline_name,
>> + .signaled = amdgpu_userq_fence_signaled,
>> + .release = amdgpu_userq_fence_release,
>> +};
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> new file mode 100644
>> index 000000000000..230dd54b4cd3
>> --- /dev/null
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq_fence.h
>> @@ -0,0 +1,61 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright 2023 Advanced Micro Devices, Inc.
>> + *
>> + * Permission is hereby granted, free of charge, to any person
>> obtaining a
>> + * copy of this software and associated documentation files (the
>> "Software"),
>> + * to deal in the Software without restriction, including without
>> limitation
>> + * the rights to use, copy, modify, merge, publish, distribute,
>> sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom
>> the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice shall be
>> included in
>> + * all copies or substantial portions of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
>> EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
>> EVENT SHALL
>> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
>> DAMAGES OR
>> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
>> OTHERWISE,
>> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
>> USE OR
>> + * OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + */
>> +
>> +#ifndef __AMDGPU_USERQ_FENCE_H__
>> +#define __AMDGPU_USERQ_FENCE_H__
>> +
>> +#include <linux/types.h>
>> +
>> +struct amdgpu_userq_fence {
>> + struct dma_fence base;
>> + /* userq fence lock */
>> + spinlock_t lock;
>> + struct list_head link;
>> + struct amdgpu_userq_fence_driver *fence_drv;
>> +};
>> +
>> +struct amdgpu_userq_fence_driver {
>> + struct kref refcount;
>> + u64 gpu_addr;
>> + u64 *cpu_addr;
>> + u64 context;
>> + /* fence list lock */
>> + spinlock_t fence_list_lock;
>> + struct list_head fences;
>> + struct amdgpu_device *adev;
>> + char timeline_name[TASK_COMM_LEN];
>> +};
>> +
>> +static const struct dma_fence_ops amdgpu_userq_fence_ops;
>> +
>> +int amdgpu_userq_fence_slab_init(void);
>> +void amdgpu_userq_fence_slab_fini(void);
>> +int amdgpu_userq_fence_driver_get(struct amdgpu_device *adev, struct
>> amdgpu_usermode_queue *userq);
>> +void amdgpu_userq_fence_driver_put(struct amdgpu_userq_fence_driver
>> *fence_drv);
>> +int amdgpu_userq_fence_create(struct amdgpu_usermode_queue *userq,
>> + u64 seq, struct dma_fence **f);
>> +bool amdgpu_userq_fence_process(struct amdgpu_userq_fence_driver
>> *fence_drv);
>> +void amdgpu_userq_fence_driver_destroy(struct kref *ref);
>> +
>> +#endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> index 2f1aba1e9792..d4317b0f6487 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>> @@ -23,6 +23,7 @@
>> #include "amdgpu.h"
>> #include "amdgpu_vm.h"
>> +#include "amdgpu_userq_fence.h"
>> #define AMDGPU_USERQ_DOORBELL_INDEX
>> (AMDGPU_NAVI10_DOORBELL_GFX_USERQUEUE_START + 4)
>> @@ -264,6 +265,8 @@ static int amdgpu_userqueue_create(struct
>> drm_file *filp, union drm_amdgpu_userq
>> struct amdgpu_vm *vm = &fpriv->vm;
>> struct amdgpu_userq_mgr *uq_mgr = &fpriv->userq_mgr;
>> struct drm_amdgpu_userq_mqd *mqd_in = &args->in.mqd;
>> + struct amdgpu_userq_fence_driver *fence_drv;
>> + struct amdgpu_device *adev = uq_mgr->adev;
>> pasid = vm->pasid;
>> if (vm->pasid < 0) {
>> @@ -280,6 +283,19 @@ static int amdgpu_userqueue_create(struct
>> drm_file *filp, union drm_amdgpu_userq
>> return -ENOMEM;
>> }
>> + fence_drv = kzalloc(sizeof(struct amdgpu_userq_fence_driver),
>> GFP_KERNEL);
>> + if (!fence_drv) {
>> + DRM_ERROR("Failed to allocate memory for fence driver\n");
>> + return -ENOMEM;
>> + }
>> +
>> + queue->fence_drv = fence_drv;
>> + r = amdgpu_userq_fence_driver_get(adev, queue);
>> + if (r) {
>> + DRM_ERROR("Failed to get fence driver\n");
>> + goto free_fence_drv;
>> + }
>> +
>> queue->vm = vm;
>> queue->pasid = pasid;
>> queue->wptr_gpu_addr = mqd_in->wptr_va;
>> @@ -339,6 +355,9 @@ static int amdgpu_userqueue_create(struct
>> drm_file *filp, union drm_amdgpu_userq
>> free_qid:
>> amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>> +free_fence_drv:
>> + amdgpu_userq_fence_driver_put(queue->fence_drv);
>> +
>> free_queue:
>> mutex_unlock(&uq_mgr->userq_mutex);
>> kfree(queue);
>> @@ -363,6 +382,7 @@ static void amdgpu_userqueue_destroy(struct
>> drm_file *filp, int queue_id)
>> amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>> amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>> list_del(&queue->userq_node);
>> + amdgpu_userq_fence_driver_put(queue->fence_drv);
>> mutex_unlock(&uq_mgr->userq_mutex);
>> kfree(queue);
>> }
>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>> b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>> index bda27583b58c..cf7264df8c46 100644
>> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>> @@ -73,6 +73,8 @@ struct amdgpu_usermode_queue {
>> struct amdgpu_vm *vm;
>> struct amdgpu_userq_mgr *userq_mgr;
>> struct list_head userq_node;
>> +
>> + struct amdgpu_userq_fence_driver *fence_drv;
>> };
>> int amdgpu_userq_ioctl(struct drm_device *dev, void *data, struct
>> drm_file *filp);
>
More information about the amd-gfx
mailing list