[PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
Marek Olšák
maraeo at gmail.com
Wed Jul 17 14:27:18 UTC 2019
On Wed., Jul. 17, 2019, 03:15 Christian König, <
ckoenig.leichtzumerken at gmail.com> wrote:
> Am 17.07.19 um 02:06 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak at amd.com>
> >
> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> > separate GDS buffers for each gfx ring.
> >
> > This is a workaround to ensure stability of transform feedback. Shaders
> hang
> > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
> >
> > The disadvantage is that compute IBs might get a different VMID,
> > because now gfx always has GDS and compute doesn't.
>
> So far compute is only using GWS, but I don't think that those
> reservations are a good idea in general.
>
> How severe is the ENOMEM problem you see with using an explicit GWS
> allocation?
>
I guess you mean GDS or OA.
There is no ENOMEM, it just hangs. I don't know why. The shader is waiting
for ds_sub and can't continue, but GDS is idle.
Marek
> Regards,
> Christian.
>
> >
> > Signed-off-by: Marek Olšák <marek.olsak at amd.com>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++++++
> > drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 ++++++
> > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20 ++++++++++++++++++++
> > 4 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 4b514a44184c..cbd55d061b72 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
> > struct drm_file *filp;
> > struct amdgpu_ctx *ctx;
> >
> > /* chunks */
> > unsigned nchunks;
> > struct amdgpu_cs_chunk *chunks;
> >
> > /* scheduler job object */
> > struct amdgpu_job *job;
> > struct drm_sched_entity *entity;
> > + unsigned hw_ip;
> >
> > /* buffer objects */
> > struct ww_acquire_ctx ticket;
> > struct amdgpu_bo_list *bo_list;
> > struct amdgpu_mn *mn;
> > struct amdgpu_bo_list_entry vm_pd;
> > struct list_head validated;
> > struct dma_fence *fence;
> > uint64_t bytes_moved_threshold;
> > uint64_t bytes_moved_vis_threshold;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index c691df6f7a57..9125cd69a124 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
> > if (r)
> > goto error_validate;
> >
> > amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
> > p->bytes_moved_vis);
> >
> > gds = p->bo_list->gds_obj;
> > gws = p->bo_list->gws_obj;
> > oa = p->bo_list->oa_obj;
> >
> > + if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> > + /* Only gfx10 allocates these. */
> > + if (!gds)
> > + gds = p->adev->gds.gds_gfx_bo;
> > + if (!oa)
> > + oa = p->adev->gds.oa_gfx_bo;
> > + }
> > +
> > amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> >
> > /* Make sure we use the exclusive slot for shared BOs */
> > if (bo->prime_shared_count)
> > e->tv.num_shared = 0;
> > e->bo_va = amdgpu_vm_bo_find(vm, bo);
> > }
> >
> > if (gds) {
> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device
> *adev,
> > struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> > struct drm_sched_entity *entity;
> >
> > chunk = &parser->chunks[i];
> > ib = &parser->job->ibs[j];
> > chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> >
> > if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> > continue;
> >
> > + parser->hw_ip = chunk_ib->ip_type;
> > +
> > if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
> > (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
> > if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
> > if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
> > ce_preempt++;
> > else
> > de_preempt++;
> > }
> >
> > /* each GFX command submit allows 0 or 1 IB
> preemptible for CE & DE */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > index df8a23554831..0943b8e1d97e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > @@ -26,20 +26,26 @@
> >
> > struct amdgpu_ring;
> > struct amdgpu_bo;
> >
> > struct amdgpu_gds {
> > uint32_t gds_size;
> > uint32_t gws_size;
> > uint32_t oa_size;
> > uint32_t gds_compute_max_wave_id;
> > uint32_t vgt_gs_max_wave_id;
> > +
> > + /* Reserved OA for the gfx ring. (gfx10) */
> > + uint32_t gds_gfx_reservation_size;
> > + uint32_t oa_gfx_reservation_size;
> > + struct amdgpu_bo *gds_gfx_bo;
> > + struct amdgpu_bo *oa_gfx_bo;
> > };
> >
> > struct amdgpu_gds_reg_offset {
> > uint32_t mem_base;
> > uint32_t mem_size;
> > uint32_t gws;
> > uint32_t oa;
> > };
> >
> > #endif /* __AMDGPU_GDS_H__ */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 618291df659b..3952754c04ff 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
> >
> > kiq = &adev->gfx.kiq;
> > r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
> > if (r)
> > return r;
> >
> > r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
> > if (r)
> > return r;
> >
> > + /* allocate reserved GDS resources for transform feedback */
> > + r = amdgpu_bo_create_kernel(adev,
> adev->gds.gds_gfx_reservation_size,
> > + 4, AMDGPU_GEM_DOMAIN_GDS,
> > + &adev->gds.gds_gfx_bo, NULL, NULL);
> > + if (r)
> > + return r;
> > +
> > + r = amdgpu_bo_create_kernel(adev,
> adev->gds.oa_gfx_reservation_size,
> > + 1, AMDGPU_GEM_DOMAIN_OA,
> > + &adev->gds.oa_gfx_bo, NULL, NULL);
> > + if (r)
> > + return r;
> > +
> > /* allocate visible FB for rlc auto-loading fw */
> > if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
> > r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
> > if (r)
> > return r;
> > }
> >
> > adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
> >
> > gfx_v10_0_gpu_early_init(adev);
> > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct
> amdgpu_device *adev)
> > amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
> > &adev->gfx.me.me_fw_gpu_addr,
> > (void **)&adev->gfx.me.me_fw_ptr);
> > }
> >
> > static int gfx_v10_0_sw_fini(void *handle)
> > {
> > int i;
> > struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> >
> > + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> > + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> > +
> > for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> > amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> > for (i = 0; i < adev->gfx.num_compute_rings; i++)
> > amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
> >
> > amdgpu_gfx_mqd_sw_fini(adev);
> > amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
> > amdgpu_gfx_kiq_fini(adev);
> >
> > gfx_v10_0_pfp_fini(adev);
> > @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct
> amdgpu_device *adev)
> > case CHIP_NAVI10:
> > default:
> > adev->gds.gds_size = 0x10000;
> > adev->gds.gds_compute_max_wave_id = 0x4ff;
> > adev->gds.vgt_gs_max_wave_id = 0x3ff;
> > break;
> > }
> >
> > adev->gds.gws_size = 64;
> > adev->gds.oa_size = 16;
> > +
> > + /* Reserved for transform feedback. */
> > + adev->gds.gds_gfx_reservation_size = 256;
> > + adev->gds.oa_gfx_reservation_size = 4;
> > }
> >
> > static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
> amdgpu_device *adev,
> > u32 bitmap)
> > {
> > u32 data;
> >
> > if (!bitmap)
> > return;
> >
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20190717/36572ff5/attachment.html>
More information about the amd-gfx
mailing list