Mesa (main): radeonsi: gather pipe_stream_output_info from NIR intrinsics

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Apr 22 22:47:27 UTC 2022


Module: Mesa
Branch: main
Commit: b57a163b7da52c92eac07147f6e0a61ee27a6da1
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b57a163b7da52c92eac07147f6e0a61ee27a6da1

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Sun Dec 19 20:10:03 2021 -0500

radeonsi: gather pipe_stream_output_info from NIR intrinsics

This stops pipe_stream_output_info from create_*s_state context functions
because NIR contains everything and can do more advanced shader linking
this way.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>

---

 src/gallium/drivers/radeonsi/gfx10_shader_ngg.c   |  4 +--
 src/gallium/drivers/radeonsi/si_pipe.h            |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c          |  9 +++--
 src/gallium/drivers/radeonsi/si_shader.h          |  4 +--
 src/gallium/drivers/radeonsi/si_shader_info.c     | 11 ++++++
 src/gallium/drivers/radeonsi/si_shader_llvm_gs.c  |  3 +-
 src/gallium/drivers/radeonsi/si_state_shaders.cpp | 41 +++++++++--------------
 src/gallium/drivers/radeonsi/si_state_streamout.c |  2 +-
 8 files changed, 41 insertions(+), 35 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 02545073f64..6e2e5cff573 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -606,7 +606,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
 
    /* The edgeflag is always stored in the last element that's also
     * used for padding to reduce LDS bank conflicts. */
-   if (shader->selector->so.num_outputs)
+   if (shader->selector->info.enabled_streamout_buffer_mask)
       lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
    if (gfx10_ngg_writes_user_edgeflags(shader))
       lds_vertex_size = MAX2(lds_vertex_size, 1);
@@ -2169,7 +2169,7 @@ unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
 {
    const struct si_shader_selector *sel = shader->selector;
 
-   if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->so.num_outputs)
+   if (sel->info.stage == MESA_SHADER_GEOMETRY && sel->info.enabled_streamout_buffer_mask)
       return 44;
 
    return 8;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 38cdc43c75e..50d8a4cc999 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -814,7 +814,7 @@ struct si_streamout {
 
    /* External state which comes from the vertex shader,
     * it must be set explicitly when binding a shader. */
-   uint16_t *stride_in_dw;
+   uint8_t *stride_in_dw;
    unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
 
    /* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 4a2960bf01b..4e0784880ed 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -27,6 +27,7 @@
 #include "nir.h"
 #include "nir_builder.h"
 #include "nir_serialize.h"
+#include "nir/nir_helpers.h"
 #include "si_pipe.h"
 #include "si_shader_internal.h"
 #include "sid.h"
@@ -1587,7 +1588,9 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
    bool free_nir;
    struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir);
 
-   struct pipe_stream_output_info so = sel->so;
+   struct pipe_stream_output_info so = {};
+   if (sel->info.enabled_streamout_buffer_mask)
+      nir_gather_stream_output_info(nir, &so);
 
    /* Dump NIR before doing NIR->LLVM conversion in case the
     * conversion fails. */
@@ -1616,7 +1619,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
 
    /* The GS copy shader is compiled next. */
    if (sel->info.stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
-      shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
+      shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);
       if (!shader->gs_copy_shader) {
          fprintf(stderr, "radeonsi: can't create GS copy shader\n");
          return false;
@@ -2312,7 +2315,7 @@ bool si_create_shader_variant(struct si_screen *sscreen, struct ac_llvm_compiler
    shader->uses_vs_state_outprim = sscreen->use_ngg &&
                                    /* Only used by streamout in vertex shaders. */
                                    sel->info.stage == MESA_SHADER_VERTEX &&
-                                   sel->so.num_outputs;
+                                   sel->info.enabled_streamout_buffer_mask;
 
    if (sel->info.stage == MESA_SHADER_VERTEX) {
       shader->uses_base_instance = sel->info.uses_base_instance ||
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 577e822bc0c..9e193d4b0c8 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -367,6 +367,7 @@ struct si_shader_info {
 
    int constbuf0_num_slots;
    ubyte num_stream_output_components[4];
+   uint16_t enabled_streamout_buffer_mask;
 
    uint num_memory_stores;
 
@@ -459,7 +460,6 @@ struct si_shader_selector {
    void *nir_binary;
    unsigned nir_size;
 
-   struct pipe_stream_output_info so;
    struct si_shader_info info;
 
    enum pipe_shader_type pipe_shader_type;
@@ -486,7 +486,6 @@ struct si_shader_selector {
    uint16_t gsvs_vertex_size;
    ubyte gs_input_verts_per_prim;
    unsigned max_gsvs_emit_size;
-   uint16_t enabled_streamout_buffer_mask;
    bool tess_turns_off_ngg;
 
    /* PS parameters. */
@@ -959,6 +958,7 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
 struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
                                              struct ac_llvm_compiler *compiler,
                                              struct si_shader_selector *gs_selector,
+                                             const struct pipe_stream_output_info *so,
                                              struct util_debug_callback *debug);
 
 /* si_shader_nir.c */
diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c
index 26c309b2dca..b9a5c9a8d46 100644
--- a/src/gallium/drivers/radeonsi/si_shader_info.c
+++ b/src/gallium/drivers/radeonsi/si_shader_info.c
@@ -325,6 +325,7 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
                                   (nir_intrinsic_component(intr) * 2);
             unsigned new_mask = mask & ~info->output_usagemask[loc];
 
+            /* Iterate over all components. */
             for (unsigned i = 0; i < 4; i++) {
                unsigned stream = (gs_streams >> (i * 2)) & 0x3;
 
@@ -332,6 +333,16 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
                   info->output_streams[loc] |= stream << (i * 2);
                   info->num_stream_output_components[stream]++;
                }
+
+               if (nir_intrinsic_has_io_xfb(intr)) {
+                  nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(intr) :
+                                           nir_intrinsic_io_xfb2(intr);
+                  if (xfb.out[i % 2].num_components) {
+                     unsigned stream = (gs_streams >> (i * 2)) & 0x3;
+                     info->enabled_streamout_buffer_mask |=
+                        BITFIELD_BIT(stream * 4 + xfb.out[i % 2].buffer);
+                  }
+               }
             }
 
             if (nir_intrinsic_has_src_type(intr))
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index 9cfca4dc5dc..6a570ceff14 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -422,6 +422,7 @@ void si_preload_gs_rings(struct si_shader_context *ctx)
 struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
                                              struct ac_llvm_compiler *compiler,
                                              struct si_shader_selector *gs_selector,
+                                             const struct pipe_stream_output_info *so,
                                              struct util_debug_callback *debug)
 {
    struct si_shader_context ctx;
@@ -446,7 +447,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
    si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
    ctx.shader = shader;
    ctx.stage = MESA_SHADER_VERTEX;
-   ctx.so = gs_selector->so;
+   ctx.so = *so;
 
    builder = ctx.ac.builder;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 7575d1631eb..0ee4bf523da 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -203,9 +203,6 @@ void si_get_ir_cache_key(struct si_shader_selector *sel, bool ngg, bool es,
    _mesa_sha1_init(&ctx);
    _mesa_sha1_update(&ctx, &shader_variant_flags, 4);
    _mesa_sha1_update(&ctx, ir_binary, ir_size);
-   if (sel->info.stage == MESA_SHADER_VERTEX || sel->info.stage == MESA_SHADER_TESS_EVAL ||
-       sel->info.stage == MESA_SHADER_GEOMETRY)
-      _mesa_sha1_update(&ctx, &sel->so, sizeof(sel->so));
    _mesa_sha1_final(&ctx, ir_sha1_cache_key);
 
    if (ir_binary == blob.data)
@@ -1512,7 +1509,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
    }
 
    shader->ctx_reg.ngg.vgt_stages.u.ngg = 1;
-   shader->ctx_reg.ngg.vgt_stages.u.streamout = gs_sel->so.num_outputs;
+   shader->ctx_reg.ngg.vgt_stages.u.streamout = !!gs_sel->info.enabled_streamout_buffer_mask;
    shader->ctx_reg.ngg.vgt_stages.u.ngg_passthrough = gfx10_is_ngg_passthrough(shader);
    shader->ctx_reg.ngg.vgt_stages.u.gs_wave32 = shader->wave_size == 32;
 }
@@ -1702,11 +1699,11 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
       rsrc1 |= S_00B128_SGPRS((shader->config.num_sgprs - 1) / 8);
 
    if (!sscreen->use_ngg_streamout) {
-      rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
-               S_00B12C_SO_BASE1_EN(!!shader->selector->so.stride[1]) |
-               S_00B12C_SO_BASE2_EN(!!shader->selector->so.stride[2]) |
-               S_00B12C_SO_BASE3_EN(!!shader->selector->so.stride[3]) |
-               S_00B12C_SO_EN(!!shader->selector->so.num_outputs);
+      rsrc2 |= S_00B12C_SO_BASE0_EN(!!shader->selector->info.base.xfb_stride[0]) |
+               S_00B12C_SO_BASE1_EN(!!shader->selector->info.base.xfb_stride[1]) |
+               S_00B12C_SO_BASE2_EN(!!shader->selector->info.base.xfb_stride[2]) |
+               S_00B12C_SO_BASE3_EN(!!shader->selector->info.base.xfb_stride[3]) |
+               S_00B12C_SO_EN(!!info->enabled_streamout_buffer_mask);
    }
 
    si_pm4_set_reg(pm4, R_00B128_SPI_SHADER_PGM_RSRC1_VS, rsrc1);
@@ -2783,7 +2780,7 @@ int si_shader_select(struct pipe_context *ctx, struct si_shader_ctx_state *state
    }
 }
 
-static void si_parse_next_shader_property(const struct si_shader_info *info, bool streamout,
+static void si_parse_next_shader_property(const struct si_shader_info *info,
                                           union si_shader_key *key)
 {
    gl_shader_stage next_shader = info->base.next_stage;
@@ -2804,7 +2801,7 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
           * assume that it's a HW LS. (the next shader is TCS)
           * This heuristic is needed for separate shader objects.
           */
-         if (!info->writes_position && !streamout)
+         if (!info->writes_position && !info->enabled_streamout_buffer_mask)
             key->ge.as_ls = 1;
       }
       break;
@@ -2874,10 +2871,11 @@ static void si_init_shader_selector_async(void *job, void *gdata, int thread_ind
 
       shader->selector = sel;
       shader->is_monolithic = false;
-      si_parse_next_shader_property(&sel->info, sel->so.num_outputs != 0, &shader->key);
+      si_parse_next_shader_property(&sel->info, &shader->key);
 
       if (sel->info.stage <= MESA_SHADER_GEOMETRY &&
-          sscreen->use_ngg && (!sel->so.num_outputs || sscreen->use_ngg_streamout) &&
+          sscreen->use_ngg && (!sel->info.enabled_streamout_buffer_mask ||
+                               sscreen->use_ngg_streamout) &&
           ((sel->info.stage == MESA_SHADER_VERTEX && !shader->key.ge.as_ls) ||
            sel->info.stage == MESA_SHADER_TESS_EVAL || sel->info.stage == MESA_SHADER_GEOMETRY))
          shader->key.ge.as_ngg = 1;
@@ -3035,8 +3033,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
    sel->compiler_ctx_state.debug = sctx->debug;
    sel->compiler_ctx_state.is_debug_context = sctx->is_debug;
 
-   sel->so = state->stream_output;
-
    if (state->type == PIPE_SHADER_IR_TGSI) {
       sel->nir = tgsi_to_nir(state->tokens, ctx->screen, true);
    } else {
@@ -3057,12 +3053,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
    si_get_active_slot_masks(&sel->info, &sel->active_const_and_shader_buffers,
                             &sel->active_samplers_and_images);
 
-   /* Record which streamout buffers are enabled. */
-   for (unsigned i = 0; i < sel->so.num_outputs; i++) {
-      sel->enabled_streamout_buffer_mask |= (1 << sel->so.output[i].output_buffer)
-                                            << (sel->so.output[i].stream * 4);
-   }
-
    sel->num_vs_inputs =
       sel->info.stage == MESA_SHADER_VERTEX && !sel->info.base.vs.blit_sgprs_amd
          ? sel->info.num_inputs
@@ -3197,7 +3187,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
       !sel->info.writes_viewport_index && /* cull only against viewport 0 */
       !sel->info.base.writes_memory &&
       /* NGG GS supports culling with streamout because it culls after streamout. */
-      (sel->info.stage == MESA_SHADER_GEOMETRY || !sel->so.num_outputs) &&
+      (sel->info.stage == MESA_SHADER_GEOMETRY || !sel->info.enabled_streamout_buffer_mask) &&
       (sel->info.stage != MESA_SHADER_GEOMETRY || sel->info.num_stream_output_components[0]) &&
       (sel->info.stage != MESA_SHADER_VERTEX ||
        (!sel->info.base.vs.blit_sgprs_amd &&
@@ -3312,8 +3302,8 @@ static void si_update_streamout_state(struct si_context *sctx)
    if (!shader_with_so)
       return;
 
-   sctx->streamout.enabled_stream_buffers_mask = shader_with_so->enabled_streamout_buffer_mask;
-   sctx->streamout.stride_in_dw = shader_with_so->so.stride;
+   sctx->streamout.enabled_stream_buffers_mask = shader_with_so->info.enabled_streamout_buffer_mask;
+   sctx->streamout.stride_in_dw = shader_with_so->info.base.xfb_stride;
 }
 
 static void si_update_clip_regs(struct si_context *sctx, struct si_shader_selector *old_hw_vs,
@@ -3440,7 +3430,8 @@ bool si_update_ngg(struct si_context *sctx)
    } else if (!sctx->screen->use_ngg_streamout) {
       struct si_shader_selector *last = si_get_vs(sctx)->cso;
 
-      if ((last && last->so.num_outputs) || sctx->streamout.prims_gen_query_enabled)
+      if ((last && last->info.enabled_streamout_buffer_mask) ||
+          sctx->streamout.prims_gen_query_enabled)
          new_ngg = false;
    }
 
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index b5557610947..0406c7d0198 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -308,7 +308,7 @@ static void si_emit_streamout_begin(struct si_context *sctx)
 {
    struct radeon_cmdbuf *cs = &sctx->gfx_cs;
    struct si_streamout_target **t = sctx->streamout.targets;
-   uint16_t *stride_in_dw = sctx->streamout.stride_in_dw;
+   uint8_t *stride_in_dw = sctx->streamout.stride_in_dw;
    unsigned i;
 
    si_flush_vgt_streamout(sctx);



More information about the mesa-commit mailing list