<div dir="auto"><div><br><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Wed., Jul. 17, 2019, 10:43 Christian König, <<a href="mailto:ckoenig.leichtzumerken@gmail.com">ckoenig.leichtzumerken@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
  
    
  
  <div text="#000000" bgcolor="#FFFFFF">
    <div class="m_-8743640733505943180moz-cite-prefix">Am 17.07.19 um 16:27 schrieb Marek
      Olšák:<br>
    </div>
    <blockquote type="cite">
      
      <div dir="auto">
        <div><br>
          <br>
          <div class="gmail_quote">
            <div dir="ltr" class="gmail_attr">On Wed., Jul. 17, 2019,
              03:15 Christian König, <<a href="mailto:ckoenig.leichtzumerken@gmail.com" target="_blank" rel="noreferrer">ckoenig.leichtzumerken@gmail.com</a>>
              wrote:<br>
            </div>
            <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Am
              17.07.19 um 02:06 schrieb Marek Olšák:<br>
              > From: Marek Olšák <<a href="mailto:marek.olsak@amd.com" rel="noreferrer noreferrer" target="_blank">marek.olsak@amd.com</a>><br>
              ><br>
              > Hopefully we'll only use 1 gfx ring, because
              otherwise we'd have to have<br>
              > separate GDS buffers for each gfx ring.<br>
              ><br>
              > This is a workaround to ensure stability of transform
              feedback. Shaders hang<br>
              > waiting for a GDS instruction (ds_sub, not
              ds_ordered_count).<br>
              ><br>
              > The disadvantage is that compute IBs might get a
              different VMID,<br>
              > because now gfx always has GDS and compute doesn't.<br>
              <br>
              So far compute is only using GWS, but I don't think that
              those <br>
              reservations are a good idea in general.<br>
              <br>
              How severe is the ENOMEM problem you see with using an
              explicit GWS <br>
              allocation?<br>
            </blockquote>
          </div>
        </div>
        <div dir="auto"><br>
        </div>
        <div dir="auto">I guess you mean GDS or OA.</div>
      </div>
    </blockquote>
    <br>
    Yeah, just a typo. Compute is using GWS and we want to use GDS and
    OA here.<br>
    <br>
    <blockquote type="cite">
      <div dir="auto">
        <div dir="auto">There is no ENOMEM, it just hangs. I don't know
          why. The shader is waiting for ds_sub and can't continue, but
          GDS is idle.</div>
      </div>
    </blockquote>
    <br>
    Well could it be because we don't correctly handle non zero offsets
    or stuff like that?</div></blockquote></div></div><div dir="auto"><br></div><div dir="auto">I don't know what you mean.</div><div dir="auto"><br></div><div dir="auto"></div><div dir="auto"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div text="#000000" bgcolor="#FFFFFF">
    <br>
    Does it work with this hack when you don't allocate GDS/OA from the
    start? (Just allocate it twice or something like this).<br></div></blockquote></div></div><div dir="auto"><br></div><div dir="auto">It's only allocated once by the kernel with this hack.</div><div dir="auto"><br></div><div dir="auto">Marek</div><div dir="auto"><br></div><div dir="auto"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div text="#000000" bgcolor="#FFFFFF">
    <br>
    Christian.<br>
    <br>
    <blockquote type="cite">
      <div dir="auto">
        <div dir="auto"><br>
        </div>
        <div dir="auto">Marek</div>
        <div dir="auto"><br>
        </div>
        <div dir="auto">
          <div class="gmail_quote">
            <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
              <br>
              Regards,<br>
              Christian.<br>
              <br>
              ><br>
              > Signed-off-by: Marek Olšák <<a href="mailto:marek.olsak@amd.com" rel="noreferrer noreferrer" target="_blank">marek.olsak@amd.com</a>><br>
              > ---<br>
              >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +<br>
              >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10
              ++++++++++<br>
              >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++<br>
              >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20
              ++++++++++++++++++++<br>
              >   4 files changed, 37 insertions(+)<br>
              ><br>
              > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
              b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
              > index 4b514a44184c..cbd55d061b72 100644<br>
              > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
              > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
              > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {<br>
              >       struct drm_file         *filp;<br>
              >       struct amdgpu_ctx       *ctx;<br>
              >   <br>
              >       /* chunks */<br>
              >       unsigned                nchunks;<br>
              >       struct amdgpu_cs_chunk  *chunks;<br>
              >   <br>
              >       /* scheduler job object */<br>
              >       struct amdgpu_job       *job;<br>
              >       struct drm_sched_entity *entity;<br>
              > +     unsigned                hw_ip;<br>
              >   <br>
              >       /* buffer objects */<br>
              >       struct ww_acquire_ctx           ticket;<br>
              >       struct amdgpu_bo_list           *bo_list;<br>
              >       struct amdgpu_mn                *mn;<br>
              >       struct amdgpu_bo_list_entry     vm_pd;<br>
              >       struct list_head                validated;<br>
              >       struct dma_fence                *fence;<br>
              >       uint64_t                       
              bytes_moved_threshold;<br>
              >       uint64_t                       
              bytes_moved_vis_threshold;<br>
              > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
              b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c<br>
              > index c691df6f7a57..9125cd69a124 100644<br>
              > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c<br>
              > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c<br>
              > @@ -678,20 +678,28 @@ static int
              amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,<br>
              >       if (r)<br>
              >               goto error_validate;<br>
              >   <br>
              >       amdgpu_cs_report_moved_bytes(p->adev,
              p->bytes_moved,<br>
              >                                   
              p->bytes_moved_vis);<br>
              >   <br>
              >       gds = p->bo_list->gds_obj;<br>
              >       gws = p->bo_list->gws_obj;<br>
              >       oa = p->bo_list->oa_obj;<br>
              >   <br>
              > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {<br>
              > +             /* Only gfx10 allocates these. */<br>
              > +             if (!gds)<br>
              > +                     gds =
              p->adev->gds.gds_gfx_bo;<br>
              > +             if (!oa)<br>
              > +                     oa =
              p->adev->gds.oa_gfx_bo;<br>
              > +     }<br>
              > +<br>
              >       amdgpu_bo_list_for_each_entry(e, p->bo_list)
              {<br>
              >               struct amdgpu_bo *bo =
              ttm_to_amdgpu_bo(e-><a href="http://tv.bo" rel="noreferrer noreferrer noreferrer" target="_blank">tv.bo</a>);<br>
              >   <br>
              >               /* Make sure we use the exclusive slot
              for shared BOs */<br>
              >               if (bo->prime_shared_count)<br>
              >                       e->tv.num_shared = 0;<br>
              >               e->bo_va = amdgpu_vm_bo_find(vm,
              bo);<br>
              >       }<br>
              >   <br>
              >       if (gds) {<br>
              > @@ -954,20 +962,22 @@ static int
              amdgpu_cs_ib_fill(struct amdgpu_device *adev,<br>
              >               struct drm_amdgpu_cs_chunk_ib
              *chunk_ib;<br>
              >               struct drm_sched_entity *entity;<br>
              >   <br>
              >               chunk = &parser->chunks[i];<br>
              >               ib = &parser->job->ibs[j];<br>
              >               chunk_ib = (struct
              drm_amdgpu_cs_chunk_ib *)chunk->kdata;<br>
              >   <br>
              >               if (chunk->chunk_id !=
              AMDGPU_CHUNK_ID_IB)<br>
              >                       continue;<br>
              >   <br>
              > +             parser->hw_ip =
              chunk_ib->ip_type;<br>
              > +<br>
              >               if (chunk_ib->ip_type ==
              AMDGPU_HW_IP_GFX &&<br>
              >                   (amdgpu_mcbp ||
              amdgpu_sriov_vf(adev))) {<br>
              >                       if (chunk_ib->flags &
              AMDGPU_IB_FLAG_PREEMPT) {<br>
              >                               if (chunk_ib->flags
              & AMDGPU_IB_FLAG_CE)<br>
              >                                       ce_preempt++;<br>
              >                               else<br>
              >                                       de_preempt++;<br>
              >                       }<br>
              >   <br>
              >                       /* each GFX command submit
              allows 0 or 1 IB preemptible for CE & DE */<br>
              > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
              b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h<br>
              > index df8a23554831..0943b8e1d97e 100644<br>
              > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h<br>
              > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h<br>
              > @@ -26,20 +26,26 @@<br>
              >   <br>
              >   struct amdgpu_ring;<br>
              >   struct amdgpu_bo;<br>
              >   <br>
              >   struct amdgpu_gds {<br>
              >       uint32_t gds_size;<br>
              >       uint32_t gws_size;<br>
              >       uint32_t oa_size;<br>
              >       uint32_t gds_compute_max_wave_id;<br>
              >       uint32_t vgt_gs_max_wave_id;<br>
              > +<br>
              > +     /* Reserved OA for the gfx ring. (gfx10) */<br>
              > +     uint32_t gds_gfx_reservation_size;<br>
              > +     uint32_t oa_gfx_reservation_size;<br>
              > +     struct amdgpu_bo *gds_gfx_bo;<br>
              > +     struct amdgpu_bo *oa_gfx_bo;<br>
              >   };<br>
              >   <br>
              >   struct amdgpu_gds_reg_offset {<br>
              >       uint32_t        mem_base;<br>
              >       uint32_t        mem_size;<br>
              >       uint32_t        gws;<br>
              >       uint32_t        oa;<br>
              >   };<br>
              >   <br>
              >   #endif /* __AMDGPU_GDS_H__ */<br>
              > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
              b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
              > index 618291df659b..3952754c04ff 100644<br>
              > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
              > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c<br>
              > @@ -1314,20 +1314,33 @@ static int
              gfx_v10_0_sw_init(void *handle)<br>
              >   <br>
              >       kiq = &adev->gfx.kiq;<br>
              >       r = amdgpu_gfx_kiq_init_ring(adev,
              &kiq->ring, &kiq->irq);<br>
              >       if (r)<br>
              >               return r;<br>
              >   <br>
              >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct
              v10_compute_mqd));<br>
              >       if (r)<br>
              >               return r;<br>
              >   <br>
              > +     /* allocate reserved GDS resources for
              transform feedback */<br>
              > +     r = amdgpu_bo_create_kernel(adev,
              adev->gds.gds_gfx_reservation_size,<br>
              > +                                 4,
              AMDGPU_GEM_DOMAIN_GDS,<br>
              > +                               
               &adev->gds.gds_gfx_bo, NULL, NULL);<br>
              > +     if (r)<br>
              > +             return r;<br>
              > +<br>
              > +     r = amdgpu_bo_create_kernel(adev,
              adev->gds.oa_gfx_reservation_size,<br>
              > +                                 1,
              AMDGPU_GEM_DOMAIN_OA,<br>
              > +                               
               &adev->gds.oa_gfx_bo, NULL, NULL);<br>
              > +     if (r)<br>
              > +             return r;<br>
              > +<br>
              >       /* allocate visible FB for rlc auto-loading fw
              */<br>
              >       if (adev->firmware.load_type ==
              AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {<br>
              >               r =
              gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);<br>
              >               if (r)<br>
              >                       return r;<br>
              >       }<br>
              >   <br>
              >       adev->gfx.ce_ram_size =
              F32_CE_PROGRAM_RAM_SIZE;<br>
              >   <br>
              >       gfx_v10_0_gpu_early_init(adev);<br>
              > @@ -1354,20 +1367,23 @@ static void
              gfx_v10_0_me_fini(struct amdgpu_device *adev)<br>
              >     
               amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,<br>
              >                           
               &adev->gfx.me.me_fw_gpu_addr,<br>
              >                             (void
              **)&adev->gfx.me.me_fw_ptr);<br>
              >   }<br>
              >   <br>
              >   static int gfx_v10_0_sw_fini(void *handle)<br>
              >   {<br>
              >       int i;<br>
              >       struct amdgpu_device *adev = (struct
              amdgpu_device *)handle;<br>
              >   <br>
              > +   
               amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL,
              NULL);<br>
              > +   
               amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL,
              NULL);<br>
              > +<br>
              >       for (i = 0; i < adev->gfx.num_gfx_rings;
              i++)<br>
              >             
               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);<br>
              >       for (i = 0; i <
              adev->gfx.num_compute_rings; i++)<br>
              >             
               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);<br>
              >   <br>
              >       amdgpu_gfx_mqd_sw_fini(adev);<br>
              >     
               amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring,
              &adev->gfx.kiq.irq);<br>
              >       amdgpu_gfx_kiq_fini(adev);<br>
              >   <br>
              >       gfx_v10_0_pfp_fini(adev);<br>
              > @@ -5181,20 +5197,24 @@ static void
              gfx_v10_0_set_gds_init(struct amdgpu_device *adev)<br>
              >       case CHIP_NAVI10:<br>
              >       default:<br>
              >               adev->gds.gds_size = 0x10000;<br>
              >               adev->gds.gds_compute_max_wave_id =
              0x4ff;<br>
              >               adev->gds.vgt_gs_max_wave_id =
              0x3ff;<br>
              >               break;<br>
              >       }<br>
              >   <br>
              >       adev->gds.gws_size = 64;<br>
              >       adev->gds.oa_size = 16;<br>
              > +<br>
              > +     /* Reserved for transform feedback. */<br>
              > +     adev->gds.gds_gfx_reservation_size = 256;<br>
              > +     adev->gds.oa_gfx_reservation_size = 4;<br>
              >   }<br>
              >   <br>
              >   static void
              gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
              amdgpu_device *adev,<br>
              >                                                     
                 u32 bitmap)<br>
              >   {<br>
              >       u32 data;<br>
              >   <br>
              >       if (!bitmap)<br>
              >               return;<br>
              >   <br>
              <br>
            </blockquote>
          </div>
        </div>
      </div>
      <br>
      <fieldset class="m_-8743640733505943180mimeAttachmentHeader"></fieldset>
      <pre class="m_-8743640733505943180moz-quote-pre">_______________________________________________
amd-gfx mailing list
<a class="m_-8743640733505943180moz-txt-link-abbreviated" href="mailto:amd-gfx@lists.freedesktop.org" target="_blank" rel="noreferrer">amd-gfx@lists.freedesktop.org</a>
<a class="m_-8743640733505943180moz-txt-link-freetext" href="https://lists.freedesktop.org/mailman/listinfo/amd-gfx" target="_blank" rel="noreferrer">https://lists.freedesktop.org/mailman/listinfo/amd-gfx</a></pre>
    </blockquote>
    <br>
  </div>

</blockquote></div></div></div>