[PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

Christian König ckoenig.leichtzumerken at gmail.com
Wed Jul 17 14:43:16 UTC 2019


Am 17.07.19 um 16:27 schrieb Marek Olšák:
>
>
> On Wed., Jul. 17, 2019, 03:15 Christian König, 
> <ckoenig.leichtzumerken at gmail.com 
> <mailto:ckoenig.leichtzumerken at gmail.com>> wrote:
>
>     Am 17.07.19 um 02:06 schrieb Marek Olšák:
>     > From: Marek Olšák <marek.olsak at amd.com <mailto:marek.olsak at amd.com>>
>     >
>     > Hopefully we'll only use 1 gfx ring, because otherwise we'd have
>     to have
>     > separate GDS buffers for each gfx ring.
>     >
>     > This is a workaround to ensure stability of transform feedback.
>     Shaders hang
>     > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>     >
>     > The disadvantage is that compute IBs might get a different VMID,
>     > because now gfx always has GDS and compute doesn't.
>
>     So far compute is only using GWS, but I don't think that those
>     reservations are a good idea in general.
>
>     How severe is the ENOMEM problem you see with using an explicit GWS
>     allocation?
>
>
> I guess you mean GDS or OA.

Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.

> There is no ENOMEM, it just hangs. I don't know why. The shader is 
> waiting for ds_sub and can't continue, but GDS is idle.

Well could it be because we don't correctly handle non zero offsets or 
stuff like that?

Does it work with this hack when you don't allocate GDS/OA from the 
start? (Just allocate it twice or something like this).

Christian.

>
> Marek
>
>
>     Regards,
>     Christian.
>
>     >
>     > Signed-off-by: Marek Olšák <marek.olsak at amd.com
>     <mailto:marek.olsak at amd.com>>
>     > ---
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>     >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>     >   4 files changed, 37 insertions(+)
>     >
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     > index 4b514a44184c..cbd55d061b72 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>     >       struct drm_file         *filp;
>     >       struct amdgpu_ctx       *ctx;
>     >
>     >       /* chunks */
>     >       unsigned                nchunks;
>     >       struct amdgpu_cs_chunk  *chunks;
>     >
>     >       /* scheduler job object */
>     >       struct amdgpu_job       *job;
>     >       struct drm_sched_entity *entity;
>     > +     unsigned                hw_ip;
>     >
>     >       /* buffer objects */
>     >       struct ww_acquire_ctx           ticket;
>     >       struct amdgpu_bo_list           *bo_list;
>     >       struct amdgpu_mn                *mn;
>     >       struct amdgpu_bo_list_entry     vm_pd;
>     >       struct list_head                validated;
>     >       struct dma_fence                *fence;
>     >       uint64_t bytes_moved_threshold;
>     >       uint64_t bytes_moved_vis_threshold;
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     > index c691df6f7a57..9125cd69a124 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
>     amdgpu_cs_parser *p,
>     >       if (r)
>     >               goto error_validate;
>     >
>     >       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>     > p->bytes_moved_vis);
>     >
>     >       gds = p->bo_list->gds_obj;
>     >       gws = p->bo_list->gws_obj;
>     >       oa = p->bo_list->oa_obj;
>     >
>     > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
>     > +             /* Only gfx10 allocates these. */
>     > +             if (!gds)
>     > +                     gds = p->adev->gds.gds_gfx_bo;
>     > +             if (!oa)
>     > +                     oa = p->adev->gds.oa_gfx_bo;
>     > +     }
>     > +
>     >       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>     >               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo
>     <http://tv.bo>);
>     >
>     >               /* Make sure we use the exclusive slot for shared
>     BOs */
>     >               if (bo->prime_shared_count)
>     >                       e->tv.num_shared = 0;
>     >               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>     >       }
>     >
>     >       if (gds) {
>     > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct
>     amdgpu_device *adev,
>     >               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>     >               struct drm_sched_entity *entity;
>     >
>     >               chunk = &parser->chunks[i];
>     >               ib = &parser->job->ibs[j];
>     >               chunk_ib = (struct drm_amdgpu_cs_chunk_ib
>     *)chunk->kdata;
>     >
>     >               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>     >                       continue;
>     >
>     > +             parser->hw_ip = chunk_ib->ip_type;
>     > +
>     >               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>     >                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>     >                       if (chunk_ib->flags &
>     AMDGPU_IB_FLAG_PREEMPT) {
>     >                               if (chunk_ib->flags &
>     AMDGPU_IB_FLAG_CE)
>     >                                       ce_preempt++;
>     >                               else
>     >                                       de_preempt++;
>     >                       }
>     >
>     >                       /* each GFX command submit allows 0 or 1
>     IB preemptible for CE & DE */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     > index df8a23554831..0943b8e1d97e 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     > @@ -26,20 +26,26 @@
>     >
>     >   struct amdgpu_ring;
>     >   struct amdgpu_bo;
>     >
>     >   struct amdgpu_gds {
>     >       uint32_t gds_size;
>     >       uint32_t gws_size;
>     >       uint32_t oa_size;
>     >       uint32_t gds_compute_max_wave_id;
>     >       uint32_t vgt_gs_max_wave_id;
>     > +
>     > +     /* Reserved OA for the gfx ring. (gfx10) */
>     > +     uint32_t gds_gfx_reservation_size;
>     > +     uint32_t oa_gfx_reservation_size;
>     > +     struct amdgpu_bo *gds_gfx_bo;
>     > +     struct amdgpu_bo *oa_gfx_bo;
>     >   };
>     >
>     >   struct amdgpu_gds_reg_offset {
>     >       uint32_t        mem_base;
>     >       uint32_t        mem_size;
>     >       uint32_t        gws;
>     >       uint32_t        oa;
>     >   };
>     >
>     >   #endif /* __AMDGPU_GDS_H__ */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > index 618291df659b..3952754c04ff 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>     >
>     >       kiq = &adev->gfx.kiq;
>     >       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>     >       if (r)
>     >               return r;
>     >
>     >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct
>     v10_compute_mqd));
>     >       if (r)
>     >               return r;
>     >
>     > +     /* allocate reserved GDS resources for transform feedback */
>     > +     r = amdgpu_bo_create_kernel(adev,
>     adev->gds.gds_gfx_reservation_size,
>     > +                                 4, AMDGPU_GEM_DOMAIN_GDS,
>     > +  &adev->gds.gds_gfx_bo, NULL, NULL);
>     > +     if (r)
>     > +             return r;
>     > +
>     > +     r = amdgpu_bo_create_kernel(adev,
>     adev->gds.oa_gfx_reservation_size,
>     > +                                 1, AMDGPU_GEM_DOMAIN_OA,
>     > +  &adev->gds.oa_gfx_bo, NULL, NULL);
>     > +     if (r)
>     > +             return r;
>     > +
>     >       /* allocate visible FB for rlc auto-loading fw */
>     >       if (adev->firmware.load_type ==
>     AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
>     >               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>     >               if (r)
>     >                       return r;
>     >       }
>     >
>     >       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>     >
>     >       gfx_v10_0_gpu_early_init(adev);
>     > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct
>     amdgpu_device *adev)
>     >  amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>     >  &adev->gfx.me.me_fw_gpu_addr,
>     >                             (void **)&adev->gfx.me.me_fw_ptr);
>     >   }
>     >
>     >   static int gfx_v10_0_sw_fini(void *handle)
>     >   {
>     >       int i;
>     >       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>     >
>     > +  amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>     > +  amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>     > +
>     >       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>     >  amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>     >       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>     >  amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>     >
>     >       amdgpu_gfx_mqd_sw_fini(adev);
>     >  amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>     >       amdgpu_gfx_kiq_fini(adev);
>     >
>     >       gfx_v10_0_pfp_fini(adev);
>     > @@ -5181,20 +5197,24 @@ static void
>     gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
>     >       case CHIP_NAVI10:
>     >       default:
>     >               adev->gds.gds_size = 0x10000;
>     >               adev->gds.gds_compute_max_wave_id = 0x4ff;
>     >               adev->gds.vgt_gs_max_wave_id = 0x3ff;
>     >               break;
>     >       }
>     >
>     >       adev->gds.gws_size = 64;
>     >       adev->gds.oa_size = 16;
>     > +
>     > +     /* Reserved for transform feedback. */
>     > +     adev->gds.gds_gfx_reservation_size = 256;
>     > +     adev->gds.oa_gfx_reservation_size = 4;
>     >   }
>     >
>     >   static void
>     gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
>     *adev,
>     >    u32 bitmap)
>     >   {
>     >       u32 data;
>     >
>     >       if (!bitmap)
>     >               return;
>     >
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20190717/1e90989a/attachment-0001.html>


More information about the amd-gfx mailing list