[PATCH 5/8] drm/amdgpu: Create context for usermode queue
Shashank Sharma
shashank.sharma at amd.com
Tue Feb 7 08:13:34 UTC 2023
On 07/02/2023 08:55, Christian König wrote:
> Am 07.02.23 um 08:51 schrieb Shashank Sharma:
>>
>> On 07/02/2023 08:14, Christian König wrote:
>>> Am 03.02.23 um 22:54 schrieb Shashank Sharma:
>>>> The FW expects us to allocate atleast one page as context space to
>>>> process gang, process, shadow, GDS and FW_space related work. This
>>>> patch creates some object for the same, and adds an IP specific
>>>> functions to do this.
>>>>
>>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>>> ---
>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 32 +++++
>>>> .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121
>>>> ++++++++++++++++++
>>>> .../gpu/drm/amd/include/amdgpu_userqueue.h | 18 +++
>>>> 3 files changed, 171 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> index 9f3490a91776..18281b3a51f1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> @@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
>>>> return idr_find(&uq_mgr->userq_idr, qid);
>>>> }
>>>> +static void
>>>> +amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>>> + struct amdgpu_usermode_queue
>>>> *queue)
>>>> +{
>>>> + uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
>>>> +}
>>>> +
>>>> +static int
>>>> +amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>>> + struct amdgpu_usermode_queue
>>>> *queue)
>>>> +{
>>>> + int r;
>>>> +
>>>> + r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to create context space for queue\n");
>>>> + return r;
>>>> + }
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> static int
>>>> amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr,
>>>> struct amdgpu_usermode_queue *queue)
>>>> {
>>>> @@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct
>>>> drm_file *filp, union drm_amdgpu_userq
>>>> goto free_qid;
>>>> }
>>>> + r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to create context space\n");
>>>> + goto free_mqd;
>>>> + }
>>>> +
>>>> list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
>>>> args->out.q_id = queue->queue_id;
>>>> args->out.flags = 0;
>>>> mutex_unlock(&uq_mgr->userq_mutex);
>>>> return 0;
>>>> +free_mqd:
>>>> + amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>>> +
>>>> free_qid:
>>>> amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>>> @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct
>>>> drm_file *filp, int queue_id)
>>>> }
>>>> mutex_lock(&uq_mgr->userq_mutex);
>>>> + amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
>>>> amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>>> amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>>> list_del(&queue->userq_node);
>>>> diff --git
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> index 57889729d635..687f90a587e3 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> @@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct
>>>> amdgpu_userq_mgr *uq_mgr, struct amdgpu_
>>>> }
>>>> +static int amdgpu_userq_gfx_v11_ctx_create(struct
>>>> amdgpu_userq_mgr *uq_mgr,
>>>> + struct
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> + int r;
>>>> + struct amdgpu_device *adev = uq_mgr->adev;
>>>> + struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>>> + struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>>> + struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>>> + struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>>> + struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>>> +
>>>> + /*
>>>> + * The FW expects atleast one page space allocated for
>>>> + * process context related work, and one for gang context.
>>>> + */
>>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ,
>>>> PAGE_SIZE,
>>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>>> + &pctx->obj,
>>>> + &pctx->gpu_addr,
>>>> + &pctx->cpu_ptr);
>>>
>>> Again, don't use amdgpu_bo_create_kernel() for any of this.
>> Noted,
>>>
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to allocate proc bo for userqueue (%d)",
>>>> r);
>>>> + return r;
>>>> + }
>>>> +
>>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ,
>>>> PAGE_SIZE,
>>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>>> + &gctx->obj,
>>>> + &gctx->gpu_addr,
>>>> + &gctx->cpu_ptr);
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to allocate gang bo for userqueue (%d)",
>>>> r);
>>>> + goto err_gangctx;
>>>> + }
>>>> +
>>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ,
>>>> PAGE_SIZE,
>>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>>> + &gdsctx->obj,
>>>> + &gdsctx->gpu_addr,
>>>> + &gdsctx->cpu_ptr);
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
>>>> + goto err_gdsctx;
>>>> + }
>>>> +
>>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ,
>>>> PAGE_SIZE,
>>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>>> + &fwctx->obj,
>>>> + &fwctx->gpu_addr,
>>>> + &fwctx->cpu_ptr);
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to allocate FW bo for userqueue (%d)", r);
>>>> + goto err_fwctx;
>>>> + }
>>>> +
>>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ,
>>>> PAGE_SIZE,
>>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>>> + &sctx->obj,
>>>> + &sctx->gpu_addr,
>>>> + &sctx->cpu_ptr);
>>>
>>> Why the heck should we allocate so many different BOs for that?
>>> Can't we put all of this into one?
>> If you mean why don't we create one object of 5 * PAGE_SIZE and give
>> 1-page spaced offsets for all of this, yes, that would further
>> simplify things.
>>
>> The reason why we kept it separate is that these objects could be of
>> different sizes on different IPs/platforms, so we thought defining a
>> separate
>>
>> size macro and object for each of these will make it easier to
>> understand how many FW page objects we are creating for this GEN IP.
>> It can be
>>
>> either ways.
>
> But this is completely uninteresting for common code, isn't it?
>
> I strongly think we should just create a single BO for each queue and
> put all the data (MQD, gang, GDS, FW, shadow) in it at different offsets.
>
> This handling here is just overkill and rather error prone (BTW you
> used AMDGPU_USERQ_FW_CTX_SZ) twice.
>
Agree, we will fix this.
- Shashank
> Christian.
>
>>
>> - Shashank
>>
>>>
>>> Christian.
>>>
>>>> + if (r) {
>>>> + DRM_ERROR("Failed to allocate shadow bo for userqueue
>>>> (%d)", r);
>>>> + goto err_sctx;
>>>> + }
>>>> +
>>>> + return 0;
>>>> +
>>>> +err_sctx:
>>>> + amdgpu_bo_free_kernel(&fwctx->obj,
>>>> + &fwctx->gpu_addr,
>>>> + &fwctx->cpu_ptr);
>>>> +
>>>> +err_fwctx:
>>>> + amdgpu_bo_free_kernel(&gdsctx->obj,
>>>> + &gdsctx->gpu_addr,
>>>> + &gdsctx->cpu_ptr);
>>>> +
>>>> +err_gdsctx:
>>>> + amdgpu_bo_free_kernel(&gctx->obj,
>>>> + &gctx->gpu_addr,
>>>> + &gctx->cpu_ptr);
>>>> +
>>>> +err_gangctx:
>>>> + amdgpu_bo_free_kernel(&pctx->obj,
>>>> + &pctx->gpu_addr,
>>>> + &pctx->cpu_ptr);
>>>> + return r;
>>>> +}
>>>> +
>>>> +static void amdgpu_userq_gfx_v11_ctx_destroy(struct
>>>> amdgpu_userq_mgr *uq_mgr,
>>>> + struct
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> + struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>>> + struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>>> + struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>>> + struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>>> + struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>>> +
>>>> + amdgpu_bo_free_kernel(&sctx->obj,
>>>> + &sctx->gpu_addr,
>>>> + &sctx->cpu_ptr);
>>>> +
>>>> + amdgpu_bo_free_kernel(&fwctx->obj,
>>>> + &fwctx->gpu_addr,
>>>> + &fwctx->cpu_ptr);
>>>> +
>>>> + amdgpu_bo_free_kernel(&gdsctx->obj,
>>>> + &gdsctx->gpu_addr,
>>>> + &gdsctx->cpu_ptr);
>>>> +
>>>> + amdgpu_bo_free_kernel(&gctx->obj,
>>>> + &gctx->gpu_addr,
>>>> + &gctx->cpu_ptr);
>>>> +
>>>> + amdgpu_bo_free_kernel(&pctx->obj,
>>>> + &pctx->gpu_addr,
>>>> + &pctx->cpu_ptr);
>>>> +}
>>>> +
>>>> static int amdgpu_userq_gfx_v11_mqd_size(struct amdgpu_userq_mgr
>>>> *uq_mgr)
>>>> {
>>>> return sizeof(struct v11_gfx_mqd);
>>>> @@ -129,4 +248,6 @@ const struct amdgpu_userq_mqd_funcs
>>>> userq_gfx_v11_mqd_funcs = {
>>>> .mqd_size = amdgpu_userq_gfx_v11_mqd_size,
>>>> .mqd_create = amdgpu_userq_gfx_v11_mqd_create,
>>>> .mqd_destroy = amdgpu_userq_gfx_v11_mqd_destroy,
>>>> + .ctx_create = amdgpu_userq_gfx_v11_ctx_create,
>>>> + .ctx_destroy = amdgpu_userq_gfx_v11_ctx_destroy,
>>>> };
>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> index a6abdfd5cb74..3adcd31618f7 100644
>>>> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> @@ -25,9 +25,19 @@
>>>> #define AMDGPU_USERQUEUE_H_
>>>> #define AMDGPU_MAX_USERQ 512
>>>> +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_GDS_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_FW_CTX_SZ PAGE_SIZE
>>>> struct amdgpu_userq_mqd_funcs;
>>>> +struct amdgpu_userq_ctx {
>>>> + struct amdgpu_bo *obj;
>>>> + uint64_t gpu_addr;
>>>> + void *cpu_ptr;
>>>> +};
>>>> +
>>>> struct amdgpu_userq_mgr {
>>>> struct idr userq_idr;
>>>> struct mutex userq_mutex;
>>>> @@ -52,6 +62,12 @@ struct amdgpu_usermode_queue {
>>>> uint64_t mqd_gpu_addr;
>>>> void *mqd_cpu_ptr;
>>>> + struct amdgpu_userq_ctx proc_ctx;
>>>> + struct amdgpu_userq_ctx gang_ctx;
>>>> + struct amdgpu_userq_ctx gds_ctx;
>>>> + struct amdgpu_userq_ctx fw_ctx;
>>>> + struct amdgpu_userq_ctx shadow_ctx;
>>>> +
>>>> struct amdgpu_bo *mqd_obj;
>>>> struct amdgpu_vm *vm;
>>>> struct amdgpu_userq_mgr *userq_mgr;
>>>> @@ -64,6 +80,8 @@ struct amdgpu_userq_mqd_funcs {
>>>> int (*mqd_size)(struct amdgpu_userq_mgr *);
>>>> int (*mqd_create)(struct amdgpu_userq_mgr *, struct
>>>> amdgpu_usermode_queue *);
>>>> void (*mqd_destroy)(struct amdgpu_userq_mgr *, struct
>>>> amdgpu_usermode_queue *);
>>>> + int (*ctx_create)(struct amdgpu_userq_mgr *, struct
>>>> amdgpu_usermode_queue *);
>>>> + void (*ctx_destroy)(struct amdgpu_userq_mgr *, struct
>>>> amdgpu_usermode_queue *);
>>>> };
>>>> int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr,
>>>> struct amdgpu_device *adev);
>>>
>
More information about the amd-gfx
mailing list