[PATCH v9 08/14] drm/amdgpu: map wptr BO into GART

Alex Deucher alexdeucher at gmail.com
Thu May 2 13:46:25 UTC 2024


On Thu, May 2, 2024 at 1:31 AM Sharma, Shashank <shashank.sharma at amd.com> wrote:
>
>
> On 01/05/2024 23:36, Alex Deucher wrote:
> > On Fri, Apr 26, 2024 at 9:57 AM Shashank Sharma <shashank.sharma at amd.com> wrote:
> >> To support oversubscription, MES FW expects WPTR BOs to
> >> be mapped into GART, before they are submitted to usermode
> >> queues. This patch adds a function for the same.
> >>
> >> V4: fix the wptr value before mapping lookup (Bas, Christian).
> >>
> >> V5: Addressed review comments from Christian:
> >>      - Either pin object or allocate from GART, but not both.
> >>      - All the handling must be done with the VM locks held.
> >>
> >> V7: Addressed review comments from Christian:
> >>      - Do not take vm->eviction_lock
> >>      - Use amdgpu_bo_gpu_offset to get the wptr_bo GPU offset
> >>
> >> V8: Rebase
> >> V9: Changed the function names from gfx_v11* to mes_v11*
> >>
> >> Cc: Alex Deucher <alexander.deucher at amd.com>
> >> Cc: Christian Koenig <christian.koenig at amd.com>
> >> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> >> Signed-off-by: Arvind Yadav <arvind.yadav at amd.com>
> >> ---
> >>   .../gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c  | 77 +++++++++++++++++++
> >>   .../gpu/drm/amd/include/amdgpu_userqueue.h    |  1 +
> >>   2 files changed, 78 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> >> index 8d2cd61af26b..37b80626e792 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0_userqueue.c
> >> @@ -30,6 +30,74 @@
> >>   #define AMDGPU_USERQ_PROC_CTX_SZ PAGE_SIZE
> >>   #define AMDGPU_USERQ_GANG_CTX_SZ PAGE_SIZE
> >>
> >> +static int
> >> +mes_v11_0_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo)
> >> +{
> >> +       int ret;
> >> +
> >> +       ret = amdgpu_bo_reserve(bo, true);
> >> +       if (ret) {
> >> +               DRM_ERROR("Failed to reserve bo. ret %d\n", ret);
> >> +               goto err_reserve_bo_failed;
> >> +       }
> >> +
> >> +       ret = amdgpu_ttm_alloc_gart(&bo->tbo);
> >> +       if (ret) {
> >> +               DRM_ERROR("Failed to bind bo to GART. ret %d\n", ret);
> >> +               goto err_map_bo_gart_failed;
> >> +       }
> >> +
> >> +       amdgpu_bo_unreserve(bo);
> >> +       bo = amdgpu_bo_ref(bo);
> >> +
> >> +       return 0;
> >> +
> >> +err_map_bo_gart_failed:
> >> +       amdgpu_bo_unreserve(bo);
> >> +err_reserve_bo_failed:
> >> +       return ret;
> >> +}
> >> +
> >> +static int
> >> +mes_v11_0_create_wptr_mapping(struct amdgpu_userq_mgr *uq_mgr,
> >> +                             struct amdgpu_usermode_queue *queue,
> >> +                             uint64_t wptr)
> >> +{
> >> +       struct amdgpu_device *adev = uq_mgr->adev;
> >> +       struct amdgpu_bo_va_mapping *wptr_mapping;
> >> +       struct amdgpu_vm *wptr_vm;
> >> +       struct amdgpu_userq_obj *wptr_obj = &queue->wptr_obj;
> >> +       int ret;
> >> +
> >> +       wptr_vm = queue->vm;
> >> +       ret = amdgpu_bo_reserve(wptr_vm->root.bo, false);
> >> +       if (ret)
> >> +               return ret;
> >> +
> >> +       wptr &= AMDGPU_GMC_HOLE_MASK;
> >> +       wptr_mapping = amdgpu_vm_bo_lookup_mapping(wptr_vm, wptr >> PAGE_SHIFT);
> >> +       amdgpu_bo_unreserve(wptr_vm->root.bo);
> >> +       if (!wptr_mapping) {
> >> +               DRM_ERROR("Failed to lookup wptr bo\n");
> >> +               return -EINVAL;
> >> +       }
> >> +
> >> +       wptr_obj->obj = wptr_mapping->bo_va->base.bo;
> >> +       if (wptr_obj->obj->tbo.base.size > PAGE_SIZE) {
> >> +               DRM_ERROR("Requested GART mapping for wptr bo larger than one page\n");
> >> +               return -EINVAL;
> >> +       }
> >> +
> >> +       ret = mes_v11_0_map_gtt_bo_to_gart(adev, wptr_obj->obj);
> >> +       if (ret) {
> >> +               DRM_ERROR("Failed to map wptr bo to GART\n");
> >> +               return ret;
> >> +       }
> >> +
> >> +       queue->wptr_obj.gpu_addr = amdgpu_bo_gpu_offset_no_check(wptr_obj->obj);
> > The wptr virtual address from the user may not be at offset 0 from the
> > start of the object.  We should add the offset to the base vmid0 GPU
> > address.
>
> can you please elaborate a bit here ? wptr_obj->obj is already mapped to
> gart, do we still need this ?

The location that the MES will poll needs to be the same as the
location that the UMD will be writing to.  E.g., if you allocate the
BO and then map it into user space at location 0x5000 in the user's
GPU virtual address space and then the user uses 0x5008 as the wptr
address, we need to make sure that we are polling in MES at vmid0
virtual address + 0x8.  If you map the BO at 0x2000 in the vmid0
address space, you need to make sure to point the firmware to 0x2008.

Alex

>
> - Shashank
>
> >
> > Alex
> >
> >> +       return 0;
> >> +}
> >> +
> >>   static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
> >>                                 struct amdgpu_usermode_queue *queue,
> >>                                 struct amdgpu_mqd_prop *userq_props)
> >> @@ -61,6 +129,7 @@ static int mes_v11_0_userq_map(struct amdgpu_userq_mgr *uq_mgr,
> >>          queue_input.queue_size = userq_props->queue_size >> 2;
> >>          queue_input.doorbell_offset = userq_props->doorbell_index;
> >>          queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(queue->vm->root.bo);
> >> +       queue_input.wptr_mc_addr = queue->wptr_obj.gpu_addr;
> >>
> >>          amdgpu_mes_lock(&adev->mes);
> >>          r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
> >> @@ -187,6 +256,13 @@ static int mes_v11_0_userq_mqd_create(struct amdgpu_userq_mgr *uq_mgr,
> >>                  goto free_mqd;
> >>          }
> >>
> >> +       /* FW expects WPTR BOs to be mapped into GART */
> >> +       r = mes_v11_0_create_wptr_mapping(uq_mgr, queue, userq_props->wptr_gpu_addr);
> >> +       if (r) {
> >> +               DRM_ERROR("Failed to create WPTR mapping\n");
> >> +               goto free_ctx;
> >> +       }
> >> +
> >>          /* Map userqueue into FW using MES */
> >>          r = mes_v11_0_userq_map(uq_mgr, queue, userq_props);
> >>          if (r) {
> >> @@ -216,6 +292,7 @@ mes_v11_0_userq_mqd_destroy(struct amdgpu_userq_mgr *uq_mgr,
> >>                              struct amdgpu_usermode_queue *queue)
> >>   {
> >>          mes_v11_0_userq_unmap(uq_mgr, queue);
> >> +       amdgpu_bo_unref(&queue->wptr_obj.obj);
> >>          amdgpu_userqueue_destroy_object(uq_mgr, &queue->fw_obj);
> >>          kfree(queue->userq_prop);
> >>          amdgpu_userqueue_destroy_object(uq_mgr, &queue->mqd);
> >> diff --git a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> >> index 643f31474bd8..ffe8a3d73756 100644
> >> --- a/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> >> +++ b/drivers/gpu/drm/amd/include/amdgpu_userqueue.h
> >> @@ -45,6 +45,7 @@ struct amdgpu_usermode_queue {
> >>          struct amdgpu_vm        *vm;
> >>          struct amdgpu_userq_obj mqd;
> >>          struct amdgpu_userq_obj fw_obj;
> >> +       struct amdgpu_userq_obj wptr_obj;
> >>   };
> >>
> >>   struct amdgpu_userq_funcs {
> >> --
> >> 2.43.2
> >>


More information about the amd-gfx mailing list