[PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10

Marek Olšák maraeo at gmail.com
Wed Jul 17 14:54:19 UTC 2019


On Wed., Jul. 17, 2019, 10:43 Christian König, <
ckoenig.leichtzumerken at gmail.com> wrote:

> Am 17.07.19 um 16:27 schrieb Marek Olšák:
>
>
>
> On Wed., Jul. 17, 2019, 03:15 Christian König, <
> ckoenig.leichtzumerken at gmail.com> wrote:
>
>> Am 17.07.19 um 02:06 schrieb Marek Olšák:
>> > From: Marek Olšák <marek.olsak at amd.com>
>> >
>> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
>> > separate GDS buffers for each gfx ring.
>> >
>> > This is a workaround to ensure stability of transform feedback. Shaders
>> hang
>> > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>> >
>> > The disadvantage is that compute IBs might get a different VMID,
>> > because now gfx always has GDS and compute doesn't.
>>
>> So far compute is only using GWS, but I don't think that those
>> reservations are a good idea in general.
>>
>> How severe is the ENOMEM problem you see with using an explicit GWS
>> allocation?
>>
>
> I guess you mean GDS or OA.
>
>
> Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.
>
> There is no ENOMEM, it just hangs. I don't know why. The shader is waiting
> for ds_sub and can't continue, but GDS is idle.
>
>
> Well could it be because we don't correctly handle non zero offsets or
> stuff like that?
>

I don't know what you mean.


> Does it work with this hack when you don't allocate GDS/OA from the start?
> (Just allocate it twice or something like this).
>

It's only allocated once by the kernel with this hack.

Marek


> Christian.
>
>
> Marek
>
>
>> Regards,
>> Christian.
>>
>> >
>> > Signed-off-by: Marek Olšák <marek.olsak at amd.com>
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>> >   4 files changed, 37 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > index 4b514a44184c..cbd55d061b72 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>> >       struct drm_file         *filp;
>> >       struct amdgpu_ctx       *ctx;
>> >
>> >       /* chunks */
>> >       unsigned                nchunks;
>> >       struct amdgpu_cs_chunk  *chunks;
>> >
>> >       /* scheduler job object */
>> >       struct amdgpu_job       *job;
>> >       struct drm_sched_entity *entity;
>> > +     unsigned                hw_ip;
>> >
>> >       /* buffer objects */
>> >       struct ww_acquire_ctx           ticket;
>> >       struct amdgpu_bo_list           *bo_list;
>> >       struct amdgpu_mn                *mn;
>> >       struct amdgpu_bo_list_entry     vm_pd;
>> >       struct list_head                validated;
>> >       struct dma_fence                *fence;
>> >       uint64_t                        bytes_moved_threshold;
>> >       uint64_t                        bytes_moved_vis_threshold;
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > index c691df6f7a57..9125cd69a124 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
>> amdgpu_cs_parser *p,
>> >       if (r)
>> >               goto error_validate;
>> >
>> >       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>> >                                    p->bytes_moved_vis);
>> >
>> >       gds = p->bo_list->gds_obj;
>> >       gws = p->bo_list->gws_obj;
>> >       oa = p->bo_list->oa_obj;
>> >
>> > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
>> > +             /* Only gfx10 allocates these. */
>> > +             if (!gds)
>> > +                     gds = p->adev->gds.gds_gfx_bo;
>> > +             if (!oa)
>> > +                     oa = p->adev->gds.oa_gfx_bo;
>> > +     }
>> > +
>> >       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> >               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> >
>> >               /* Make sure we use the exclusive slot for shared BOs */
>> >               if (bo->prime_shared_count)
>> >                       e->tv.num_shared = 0;
>> >               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> >       }
>> >
>> >       if (gds) {
>> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device
>> *adev,
>> >               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>> >               struct drm_sched_entity *entity;
>> >
>> >               chunk = &parser->chunks[i];
>> >               ib = &parser->job->ibs[j];
>> >               chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>> >
>> >               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>> >                       continue;
>> >
>> > +             parser->hw_ip = chunk_ib->ip_type;
>> > +
>> >               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>> >                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>> >                       if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
>> >                               if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
>> >                                       ce_preempt++;
>> >                               else
>> >                                       de_preempt++;
>> >                       }
>> >
>> >                       /* each GFX command submit allows 0 or 1 IB
>> preemptible for CE & DE */
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> > index df8a23554831..0943b8e1d97e 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> > @@ -26,20 +26,26 @@
>> >
>> >   struct amdgpu_ring;
>> >   struct amdgpu_bo;
>> >
>> >   struct amdgpu_gds {
>> >       uint32_t gds_size;
>> >       uint32_t gws_size;
>> >       uint32_t oa_size;
>> >       uint32_t gds_compute_max_wave_id;
>> >       uint32_t vgt_gs_max_wave_id;
>> > +
>> > +     /* Reserved OA for the gfx ring. (gfx10) */
>> > +     uint32_t gds_gfx_reservation_size;
>> > +     uint32_t oa_gfx_reservation_size;
>> > +     struct amdgpu_bo *gds_gfx_bo;
>> > +     struct amdgpu_bo *oa_gfx_bo;
>> >   };
>> >
>> >   struct amdgpu_gds_reg_offset {
>> >       uint32_t        mem_base;
>> >       uint32_t        mem_size;
>> >       uint32_t        gws;
>> >       uint32_t        oa;
>> >   };
>> >
>> >   #endif /* __AMDGPU_GDS_H__ */
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> > index 618291df659b..3952754c04ff 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>> >
>> >       kiq = &adev->gfx.kiq;
>> >       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>> >       if (r)
>> >               return r;
>> >
>> >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
>> >       if (r)
>> >               return r;
>> >
>> > +     /* allocate reserved GDS resources for transform feedback */
>> > +     r = amdgpu_bo_create_kernel(adev,
>> adev->gds.gds_gfx_reservation_size,
>> > +                                 4, AMDGPU_GEM_DOMAIN_GDS,
>> > +                                 &adev->gds.gds_gfx_bo, NULL, NULL);
>> > +     if (r)
>> > +             return r;
>> > +
>> > +     r = amdgpu_bo_create_kernel(adev,
>> adev->gds.oa_gfx_reservation_size,
>> > +                                 1, AMDGPU_GEM_DOMAIN_OA,
>> > +                                 &adev->gds.oa_gfx_bo, NULL, NULL);
>> > +     if (r)
>> > +             return r;
>> > +
>> >       /* allocate visible FB for rlc auto-loading fw */
>> >       if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
>> {
>> >               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>> >               if (r)
>> >                       return r;
>> >       }
>> >
>> >       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>> >
>> >       gfx_v10_0_gpu_early_init(adev);
>> > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct
>> amdgpu_device *adev)
>> >       amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>> >                             &adev->gfx.me.me_fw_gpu_addr,
>> >                             (void **)&adev->gfx.me.me_fw_ptr);
>> >   }
>> >
>> >   static int gfx_v10_0_sw_fini(void *handle)
>> >   {
>> >       int i;
>> >       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> >
>> > +     amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>> > +     amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>> > +
>> >       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>> >               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>> >       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>> >               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>> >
>> >       amdgpu_gfx_mqd_sw_fini(adev);
>> >       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>> >       amdgpu_gfx_kiq_fini(adev);
>> >
>> >       gfx_v10_0_pfp_fini(adev);
>> > @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct
>> amdgpu_device *adev)
>> >       case CHIP_NAVI10:
>> >       default:
>> >               adev->gds.gds_size = 0x10000;
>> >               adev->gds.gds_compute_max_wave_id = 0x4ff;
>> >               adev->gds.vgt_gs_max_wave_id = 0x3ff;
>> >               break;
>> >       }
>> >
>> >       adev->gds.gws_size = 64;
>> >       adev->gds.oa_size = 16;
>> > +
>> > +     /* Reserved for transform feedback. */
>> > +     adev->gds.gds_gfx_reservation_size = 256;
>> > +     adev->gds.oa_gfx_reservation_size = 4;
>> >   }
>> >
>> >   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
>> amdgpu_device *adev,
>> >                                                         u32 bitmap)
>> >   {
>> >       u32 data;
>> >
>> >       if (!bitmap)
>> >               return;
>> >
>>
>>
> _______________________________________________
> amd-gfx mailing listamd-gfx at lists.freedesktop.orghttps://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20190717/80fbdceb/attachment-0001.html>


More information about the amd-gfx mailing list