[PATCH 5/8] drm/amdgpu: Create context for usermode queue

Tue Feb 7 08:13:34 UTC 2023

On 07/02/2023 08:55, Christian König wrote:
> Am 07.02.23 um 08:51 schrieb Shashank Sharma:
>>
>> On 07/02/2023 08:14, Christian König wrote:
>>> Am 03.02.23 um 22:54 schrieb Shashank Sharma:
>>>> The FW expects us to allocate atleast one page as context space to
>>>> process gang, process, shadow, GDS and FW_space related work. This
>>>> patch creates some object for the same, and adds an IP specific
>>>> functions to do this.
>>>>
>>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c |  32 +++++
>>>>   .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121 
>>>> ++++++++++++++++++
>>>>   .../gpu/drm/amd/include/amdgpu_userqueue.h    |  18 +++
>>>>   3 files changed, 171 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> index 9f3490a91776..18281b3a51f1 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>>> @@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
>>>>       return idr_find(&uq_mgr->userq_idr, qid);
>>>>   }
>>>>   +static void
>>>> +amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>>> +                                   struct amdgpu_usermode_queue 
>>>> *queue)
>>>> +{
>>>> +    uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
>>>> +}
>>>> +
>>>> +static int
>>>> +amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>>> +                                  struct amdgpu_usermode_queue 
>>>> *queue)
>>>> +{
>>>> +    int r;
>>>> +
>>>> +    r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to create context space for queue\n");
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>   static int
>>>>   amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr, 
>>>> struct amdgpu_usermode_queue *queue)
>>>>   {
>>>> @@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct 
>>>> drm_file *filp, union drm_amdgpu_userq
>>>>           goto free_qid;
>>>>       }
>>>>   +    r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to create context space\n");
>>>> +        goto free_mqd;
>>>> +    }
>>>> +
>>>>       list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
>>>>       args->out.q_id = queue->queue_id;
>>>>       args->out.flags = 0;
>>>>       mutex_unlock(&uq_mgr->userq_mutex);
>>>>       return 0;
>>>>   +free_mqd:
>>>> +    amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>>> +
>>>>   free_qid:
>>>>       amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>>>   @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct 
>>>> drm_file *filp, int queue_id)
>>>>       }
>>>>         mutex_lock(&uq_mgr->userq_mutex);
>>>> +    amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
>>>>       amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>>>       amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>>>       list_del(&queue->userq_node);
>>>> diff --git 
>>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> index 57889729d635..687f90a587e3 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>>> @@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct 
>>>> amdgpu_userq_mgr *uq_mgr, struct amdgpu_
>>>>     }
>>>>   +static int amdgpu_userq_gfx_v11_ctx_create(struct 
>>>> amdgpu_userq_mgr *uq_mgr,
>>>> +                                           struct 
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> +    int r;
>>>> +    struct amdgpu_device *adev = uq_mgr->adev;
>>>> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>>> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>>> +    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>>> +    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>>> +    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>>> +
>>>> +    /*
>>>> +     * The FW expects atleast one page space allocated for
>>>> +     * process context related work, and one for gang context.
>>>> +     */
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &pctx->obj,
>>>> +                                &pctx->gpu_addr,
>>>> +                                &pctx->cpu_ptr);
>>>
>>> Again, don't use amdgpu_bo_create_kernel() for any of this.
>> Noted,
>>>
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", 
>>>> r);
>>>> +        return r;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &gctx->obj,
>>>> +                                &gctx->gpu_addr,
>>>> +                                &gctx->cpu_ptr);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", 
>>>> r);
>>>> +        goto err_gangctx;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &gdsctx->obj,
>>>> +                                &gdsctx->gpu_addr,
>>>> +                                &gdsctx->cpu_ptr);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
>>>> +        goto err_gdsctx;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &fwctx->obj,
>>>> +                                &fwctx->gpu_addr,
>>>> +                                &fwctx->cpu_ptr);
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate FW bo for userqueue (%d)", r);
>>>> +        goto err_fwctx;
>>>> +    }
>>>> +
>>>> +    r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ, 
>>>> PAGE_SIZE,
>>>> +                                AMDGPU_GEM_DOMAIN_VRAM,
>>>> +                                &sctx->obj,
>>>> +                                &sctx->gpu_addr,
>>>> +                                &sctx->cpu_ptr);
>>>
>>> Why the heck should we allocate so many different BOs for that? 
>>> Can't we put all of this into one?
>> If you mean why don't we create one object of 5 * PAGE_SIZE and give 
>> 1-page spaced offsets for all of this, yes, that would further 
>> simplify things.
>>
>> The reason why we kept it separate is that these objects could be of 
>> different sizes on different IPs/platforms, so we thought defining a 
>> separate
>>
>> size macro and object for each of these will make it easier to 
>> understand how many FW page objects we are creating for this GEN IP. 
>> It can be
>>
>> either ways.
>
> But this is completely uninteresting for common code, isn't it?
>
> I strongly think we should just create a single BO for each queue and 
> put all the data (MQD, gang, GDS, FW, shadow) in it at different offsets.
>
> This handling here is just overkill and rather error prone (BTW you 
> used AMDGPU_USERQ_FW_CTX_SZ) twice.
>

Agree, we will fix this.

- Shashank

> Christian.
>
>>
>> - Shashank
>>
>>>
>>> Christian.
>>>
>>>> +    if (r) {
>>>> +        DRM_ERROR("Failed to allocate shadow bo for userqueue 
>>>> (%d)", r);
>>>> +        goto err_sctx;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>> +
>>>> +err_sctx:
>>>> +    amdgpu_bo_free_kernel(&fwctx->obj,
>>>> +                          &fwctx->gpu_addr,
>>>> +                          &fwctx->cpu_ptr);
>>>> +
>>>> +err_fwctx:
>>>> +    amdgpu_bo_free_kernel(&gdsctx->obj,
>>>> +                          &gdsctx->gpu_addr,
>>>> +                          &gdsctx->cpu_ptr);
>>>> +
>>>> +err_gdsctx:
>>>> +    amdgpu_bo_free_kernel(&gctx->obj,
>>>> +                          &gctx->gpu_addr,
>>>> +                          &gctx->cpu_ptr);
>>>> +
>>>> +err_gangctx:
>>>> +    amdgpu_bo_free_kernel(&pctx->obj,
>>>> +                          &pctx->gpu_addr,
>>>> +                          &pctx->cpu_ptr);
>>>> +    return r;
>>>> +}
>>>> +
>>>> +static void amdgpu_userq_gfx_v11_ctx_destroy(struct 
>>>> amdgpu_userq_mgr *uq_mgr,
>>>> +                                            struct 
>>>> amdgpu_usermode_queue *queue)
>>>> +{
>>>> +    struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>>> +    struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>>> +    struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>>> +    struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>>> +    struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>>> +
>>>> +    amdgpu_bo_free_kernel(&sctx->obj,
>>>> +                          &sctx->gpu_addr,
>>>> +                          &sctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&fwctx->obj,
>>>> +                          &fwctx->gpu_addr,
>>>> +                          &fwctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&gdsctx->obj,
>>>> +                          &gdsctx->gpu_addr,
>>>> +                          &gdsctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&gctx->obj,
>>>> +                          &gctx->gpu_addr,
>>>> +                          &gctx->cpu_ptr);
>>>> +
>>>> +    amdgpu_bo_free_kernel(&pctx->obj,
>>>> +                          &pctx->gpu_addr,
>>>> +                          &pctx->cpu_ptr);
>>>> +}
>>>> +
>>>>   static int amdgpu_userq_gfx_v11_mqd_size(struct amdgpu_userq_mgr 
>>>> *uq_mgr)
>>>>   {
>>>>       return sizeof(struct v11_gfx_mqd);
>>>> @@ -129,4 +248,6 @@ const struct amdgpu_userq_mqd_funcs 
>>>> userq_gfx_v11_mqd_funcs = {
>>>>       .mqd_size = amdgpu_userq_gfx_v11_mqd_size,
>>>>       .mqd_create = amdgpu_userq_gfx_v11_mqd_create,
>>>>       .mqd_destroy = amdgpu_userq_gfx_v11_mqd_destroy,
>>>> +    .ctx_create = amdgpu_userq_gfx_v11_ctx_create,
>>>> +    .ctx_destroy = amdgpu_userq_gfx_v11_ctx_destroy,
>>>>   };
>>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h 
>>>> b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> index a6abdfd5cb74..3adcd31618f7 100644
>>>> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>>> @@ -25,9 +25,19 @@
>>>>   #define AMDGPU_USERQUEUE_H_
>>>>     #define AMDGPU_MAX_USERQ 512
>>>> +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_GDS_CTX_SZ PAGE_SIZE
>>>> +#define AMDGPU_USERQ_FW_CTX_SZ PAGE_SIZE
>>>>     struct amdgpu_userq_mqd_funcs;
>>>>   +struct amdgpu_userq_ctx {
>>>> +    struct amdgpu_bo *obj;
>>>> +    uint64_t gpu_addr;
>>>> +    void    *cpu_ptr;
>>>> +};
>>>> +
>>>>   struct amdgpu_userq_mgr {
>>>>       struct idr userq_idr;
>>>>       struct mutex userq_mutex;
>>>> @@ -52,6 +62,12 @@ struct amdgpu_usermode_queue {
>>>>       uint64_t    mqd_gpu_addr;
>>>>       void         *mqd_cpu_ptr;
>>>>   +    struct amdgpu_userq_ctx    proc_ctx;
>>>> +    struct amdgpu_userq_ctx    gang_ctx;
>>>> +    struct amdgpu_userq_ctx    gds_ctx;
>>>> +    struct amdgpu_userq_ctx    fw_ctx;
>>>> +    struct amdgpu_userq_ctx    shadow_ctx;
>>>> +
>>>>       struct amdgpu_bo    *mqd_obj;
>>>>       struct amdgpu_vm        *vm;
>>>>       struct amdgpu_userq_mgr *userq_mgr;
>>>> @@ -64,6 +80,8 @@ struct amdgpu_userq_mqd_funcs {
>>>>       int (*mqd_size)(struct amdgpu_userq_mgr *);
>>>>       int (*mqd_create)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>>       void (*mqd_destroy)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>> +    int (*ctx_create)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>> +    void (*ctx_destroy)(struct amdgpu_userq_mgr *, struct 
>>>> amdgpu_usermode_queue *);
>>>>   };
>>>>     int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr, 
>>>> struct amdgpu_device *adev);
>>>
>