[PATCH 5/8] drm/amdgpu: Create context for usermode queue
Christian König
christian.koenig at amd.com
Tue Feb 7 07:55:52 UTC 2023
Am 07.02.23 um 08:51 schrieb Shashank Sharma:
>
> On 07/02/2023 08:14, Christian König wrote:
>> Am 03.02.23 um 22:54 schrieb Shashank Sharma:
>>> The FW expects us to allocate atleast one page as context space to
>>> process gang, process, shadow, GDS and FW_space related work. This
>>> patch creates some object for the same, and adds an IP specific
>>> functions to do this.
>>>
>>> Cc: Alex Deucher <alexander.deucher at amd.com>
>>> Cc: Christian Koenig <christian.koenig at amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>>> ---
>>> drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c | 32 +++++
>>> .../amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c | 121
>>> ++++++++++++++++++
>>> .../gpu/drm/amd/include/amdgpu_userqueue.h | 18 +++
>>> 3 files changed, 171 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> index 9f3490a91776..18281b3a51f1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue.c
>>> @@ -42,6 +42,28 @@ static struct amdgpu_usermode_queue
>>> return idr_find(&uq_mgr->userq_idr, qid);
>>> }
>>> +static void
>>> +amdgpu_userqueue_destroy_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>> + struct amdgpu_usermode_queue
>>> *queue)
>>> +{
>>> + uq_mgr->userq_mqd_funcs->ctx_destroy(uq_mgr, queue);
>>> +}
>>> +
>>> +static int
>>> +amdgpu_userqueue_create_ctx_space(struct amdgpu_userq_mgr *uq_mgr,
>>> + struct amdgpu_usermode_queue *queue)
>>> +{
>>> + int r;
>>> +
>>> + r = uq_mgr->userq_mqd_funcs->ctx_create(uq_mgr, queue);
>>> + if (r) {
>>> + DRM_ERROR("Failed to create context space for queue\n");
>>> + return r;
>>> + }
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int
>>> amdgpu_userqueue_create_mqd(struct amdgpu_userq_mgr *uq_mgr,
>>> struct amdgpu_usermode_queue *queue)
>>> {
>>> @@ -142,12 +164,21 @@ static int amdgpu_userqueue_create(struct
>>> drm_file *filp, union drm_amdgpu_userq
>>> goto free_qid;
>>> }
>>> + r = amdgpu_userqueue_create_ctx_space(uq_mgr, queue);
>>> + if (r) {
>>> + DRM_ERROR("Failed to create context space\n");
>>> + goto free_mqd;
>>> + }
>>> +
>>> list_add_tail(&queue->userq_node, &uq_mgr->userq_list);
>>> args->out.q_id = queue->queue_id;
>>> args->out.flags = 0;
>>> mutex_unlock(&uq_mgr->userq_mutex);
>>> return 0;
>>> +free_mqd:
>>> + amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>> +
>>> free_qid:
>>> amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>> @@ -170,6 +201,7 @@ static void amdgpu_userqueue_destroy(struct
>>> drm_file *filp, int queue_id)
>>> }
>>> mutex_lock(&uq_mgr->userq_mutex);
>>> + amdgpu_userqueue_destroy_ctx_space(uq_mgr, queue);
>>> amdgpu_userqueue_destroy_mqd(uq_mgr, queue);
>>> amdgpu_userqueue_free_index(uq_mgr, queue->queue_id);
>>> list_del(&queue->userq_node);
>>> diff --git
>>> a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>> index 57889729d635..687f90a587e3 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userqueue_mqd_gfx_v11.c
>>> @@ -120,6 +120,125 @@ amdgpu_userq_gfx_v11_mqd_destroy(struct
>>> amdgpu_userq_mgr *uq_mgr, struct amdgpu_
>>> }
>>> +static int amdgpu_userq_gfx_v11_ctx_create(struct
>>> amdgpu_userq_mgr *uq_mgr,
>>> + struct
>>> amdgpu_usermode_queue *queue)
>>> +{
>>> + int r;
>>> + struct amdgpu_device *adev = uq_mgr->adev;
>>> + struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>> + struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>> + struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>> + struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>> + struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>> +
>>> + /*
>>> + * The FW expects atleast one page space allocated for
>>> + * process context related work, and one for gang context.
>>> + */
>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_PROC_CTX_SZ,
>>> PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &pctx->obj,
>>> + &pctx->gpu_addr,
>>> + &pctx->cpu_ptr);
>>
>> Again, don't use amdgpu_bo_create_kernel() for any of this.
> Noted,
>>
>>> + if (r) {
>>> + DRM_ERROR("Failed to allocate proc bo for userqueue (%d)", r);
>>> + return r;
>>> + }
>>> +
>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GANG_CTX_SZ,
>>> PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &gctx->obj,
>>> + &gctx->gpu_addr,
>>> + &gctx->cpu_ptr);
>>> + if (r) {
>>> + DRM_ERROR("Failed to allocate gang bo for userqueue (%d)", r);
>>> + goto err_gangctx;
>>> + }
>>> +
>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_GDS_CTX_SZ,
>>> PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &gdsctx->obj,
>>> + &gdsctx->gpu_addr,
>>> + &gdsctx->cpu_ptr);
>>> + if (r) {
>>> + DRM_ERROR("Failed to allocate GDS bo for userqueue (%d)", r);
>>> + goto err_gdsctx;
>>> + }
>>> +
>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ,
>>> PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &fwctx->obj,
>>> + &fwctx->gpu_addr,
>>> + &fwctx->cpu_ptr);
>>> + if (r) {
>>> + DRM_ERROR("Failed to allocate FW bo for userqueue (%d)", r);
>>> + goto err_fwctx;
>>> + }
>>> +
>>> + r = amdgpu_bo_create_kernel(adev, AMDGPU_USERQ_FW_CTX_SZ,
>>> PAGE_SIZE,
>>> + AMDGPU_GEM_DOMAIN_VRAM,
>>> + &sctx->obj,
>>> + &sctx->gpu_addr,
>>> + &sctx->cpu_ptr);
>>
>> Why the heck should we allocate so many different BOs for that? Can't
>> we put all of this into one?
> If you mean why don't we create one object of 5 * PAGE_SIZE and give
> 1-page spaced offsets for all of this, yes, that would further
> simplify things.
>
> The reason why we kept it separate is that these objects could be of
> different sizes on different IPs/platforms, so we thought defining a
> separate
>
> size macro and object for each of these will make it easier to
> understand how many FW page objects we are creating for this GEN IP.
> It can be
>
> either ways.
But this is completely uninteresting for common code, isn't it?
I strongly think we should just create a single BO for each queue and
put all the data (MQD, gang, GDS, FW, shadow) in it at different offsets.
This handling here is just overkill and rather error prone (BTW you used
AMDGPU_USERQ_FW_CTX_SZ) twice.
Christian.
>
> - Shashank
>
>>
>> Christian.
>>
>>> + if (r) {
>>> + DRM_ERROR("Failed to allocate shadow bo for userqueue
>>> (%d)", r);
>>> + goto err_sctx;
>>> + }
>>> +
>>> + return 0;
>>> +
>>> +err_sctx:
>>> + amdgpu_bo_free_kernel(&fwctx->obj,
>>> + &fwctx->gpu_addr,
>>> + &fwctx->cpu_ptr);
>>> +
>>> +err_fwctx:
>>> + amdgpu_bo_free_kernel(&gdsctx->obj,
>>> + &gdsctx->gpu_addr,
>>> + &gdsctx->cpu_ptr);
>>> +
>>> +err_gdsctx:
>>> + amdgpu_bo_free_kernel(&gctx->obj,
>>> + &gctx->gpu_addr,
>>> + &gctx->cpu_ptr);
>>> +
>>> +err_gangctx:
>>> + amdgpu_bo_free_kernel(&pctx->obj,
>>> + &pctx->gpu_addr,
>>> + &pctx->cpu_ptr);
>>> + return r;
>>> +}
>>> +
>>> +static void amdgpu_userq_gfx_v11_ctx_destroy(struct
>>> amdgpu_userq_mgr *uq_mgr,
>>> + struct
>>> amdgpu_usermode_queue *queue)
>>> +{
>>> + struct amdgpu_userq_ctx *pctx = &queue->proc_ctx;
>>> + struct amdgpu_userq_ctx *gctx = &queue->gang_ctx;
>>> + struct amdgpu_userq_ctx *gdsctx = &queue->gds_ctx;
>>> + struct amdgpu_userq_ctx *fwctx = &queue->fw_ctx;
>>> + struct amdgpu_userq_ctx *sctx = &queue->shadow_ctx;
>>> +
>>> + amdgpu_bo_free_kernel(&sctx->obj,
>>> + &sctx->gpu_addr,
>>> + &sctx->cpu_ptr);
>>> +
>>> + amdgpu_bo_free_kernel(&fwctx->obj,
>>> + &fwctx->gpu_addr,
>>> + &fwctx->cpu_ptr);
>>> +
>>> + amdgpu_bo_free_kernel(&gdsctx->obj,
>>> + &gdsctx->gpu_addr,
>>> + &gdsctx->cpu_ptr);
>>> +
>>> + amdgpu_bo_free_kernel(&gctx->obj,
>>> + &gctx->gpu_addr,
>>> + &gctx->cpu_ptr);
>>> +
>>> + amdgpu_bo_free_kernel(&pctx->obj,
>>> + &pctx->gpu_addr,
>>> + &pctx->cpu_ptr);
>>> +}
>>> +
>>> static int amdgpu_userq_gfx_v11_mqd_size(struct amdgpu_userq_mgr
>>> *uq_mgr)
>>> {
>>> return sizeof(struct v11_gfx_mqd);
>>> @@ -129,4 +248,6 @@ const struct amdgpu_userq_mqd_funcs
>>> userq_gfx_v11_mqd_funcs = {
>>> .mqd_size = amdgpu_userq_gfx_v11_mqd_size,
>>> .mqd_create = amdgpu_userq_gfx_v11_mqd_create,
>>> .mqd_destroy = amdgpu_userq_gfx_v11_mqd_destroy,
>>> + .ctx_create = amdgpu_userq_gfx_v11_ctx_create,
>>> + .ctx_destroy = amdgpu_userq_gfx_v11_ctx_destroy,
>>> };
>>> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>> b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>> index a6abdfd5cb74..3adcd31618f7 100644
>>> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
>>> @@ -25,9 +25,19 @@
>>> #define AMDGPU_USERQUEUE_H_
>>> #define AMDGPU_MAX_USERQ 512
>>> +#define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
>>> +#define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
>>> +#define AMDGPU_USERQ_GDS_CTX_SZ PAGE_SIZE
>>> +#define AMDGPU_USERQ_FW_CTX_SZ PAGE_SIZE
>>> struct amdgpu_userq_mqd_funcs;
>>> +struct amdgpu_userq_ctx {
>>> + struct amdgpu_bo *obj;
>>> + uint64_t gpu_addr;
>>> + void *cpu_ptr;
>>> +};
>>> +
>>> struct amdgpu_userq_mgr {
>>> struct idr userq_idr;
>>> struct mutex userq_mutex;
>>> @@ -52,6 +62,12 @@ struct amdgpu_usermode_queue {
>>> uint64_t mqd_gpu_addr;
>>> void *mqd_cpu_ptr;
>>> + struct amdgpu_userq_ctx proc_ctx;
>>> + struct amdgpu_userq_ctx gang_ctx;
>>> + struct amdgpu_userq_ctx gds_ctx;
>>> + struct amdgpu_userq_ctx fw_ctx;
>>> + struct amdgpu_userq_ctx shadow_ctx;
>>> +
>>> struct amdgpu_bo *mqd_obj;
>>> struct amdgpu_vm *vm;
>>> struct amdgpu_userq_mgr *userq_mgr;
>>> @@ -64,6 +80,8 @@ struct amdgpu_userq_mqd_funcs {
>>> int (*mqd_size)(struct amdgpu_userq_mgr *);
>>> int (*mqd_create)(struct amdgpu_userq_mgr *, struct
>>> amdgpu_usermode_queue *);
>>> void (*mqd_destroy)(struct amdgpu_userq_mgr *, struct
>>> amdgpu_usermode_queue *);
>>> + int (*ctx_create)(struct amdgpu_userq_mgr *, struct
>>> amdgpu_usermode_queue *);
>>> + void (*ctx_destroy)(struct amdgpu_userq_mgr *, struct
>>> amdgpu_usermode_queue *);
>>> };
>>> int amdgpu_userq_mgr_init(struct amdgpu_userq_mgr *userq_mgr,
>>> struct amdgpu_device *adev);
>>
More information about the amd-gfx
mailing list