Mesa (main): radeonsi: replace llvm based fixed tcs with nir

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jun 27 03:16:28 UTC 2022


Module: Mesa
Branch: main
Commit: a1763ad4b362c9f3a1fd12b6d06009b17fac3d24
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a1763ad4b362c9f3a1fd12b6d06009b17fac3d24

Author: Qiang Yu <yuq825 at gmail.com>
Date:   Fri May 20 17:27:27 2022 +0800

radeonsi: replace llvm based fixed tcs with nir

Create nir passthrough shader with explicit input/output and vertex
output count so that it can be handled by compiler same as user tcs.

The drawback is we create more si_shader_selector with different
input/output and vertex output count which was handled by compiler
backend before.

As fixed function tcs can be handled like user tcs, we don't need
the dedicated fixed_func_tcs_shader state either.

Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Signed-off-by: Qiang Yu <yuq825 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16705>

---

 src/gallium/drivers/radeonsi/si_debug.c            |  10 +-
 src/gallium/drivers/radeonsi/si_pipe.c             |   9 +-
 src/gallium/drivers/radeonsi/si_pipe.h             |   5 +-
 src/gallium/drivers/radeonsi/si_shader.c           |   8 +-
 src/gallium/drivers/radeonsi/si_shader.h           |   1 -
 src/gallium/drivers/radeonsi/si_shader_llvm.c      |   5 +-
 src/gallium/drivers/radeonsi/si_shader_llvm_tess.c |  45 +-------
 src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c   |  27 -----
 src/gallium/drivers/radeonsi/si_state.h            |   1 +
 src/gallium/drivers/radeonsi/si_state_draw.cpp     | 113 +++++++++------------
 src/gallium/drivers/radeonsi/si_state_shaders.cpp  |  68 ++++++++++---
 11 files changed, 120 insertions(+), 172 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 85f5667f2a7..d1deb09cca6 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -1036,19 +1036,13 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flag
 
 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
 {
-   struct si_shader_ctx_state *tcs_shader;
-
    if (!log)
       return;
 
-   tcs_shader = &sctx->shader.tcs;
-   if (sctx->shader.tes.cso && !sctx->shader.tcs.cso)
-      tcs_shader = &sctx->fixed_func_tcs_shader;
-
    si_dump_framebuffer(sctx, log);
 
    si_dump_gfx_shader(sctx, &sctx->shader.vs, log);
-   si_dump_gfx_shader(sctx, tcs_shader, log);
+   si_dump_gfx_shader(sctx, &sctx->shader.tcs, log);
    si_dump_gfx_shader(sctx, &sctx->shader.tes, log);
    si_dump_gfx_shader(sctx, &sctx->shader.gs, log);
    si_dump_gfx_shader(sctx, &sctx->shader.ps, log);
@@ -1057,7 +1051,7 @@ void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
                            4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity,
                            log);
    si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log);
-   si_dump_gfx_descriptors(sctx, tcs_shader, log);
+   si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log);
    si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log);
    si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log);
    si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 659223fb888..c8fc1bdcc3d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -228,8 +228,13 @@ static void si_destroy_context(struct pipe_context *context)
    for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
       si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
 
-   if (sctx->fixed_func_tcs_shader.cso)
-      sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso);
+   if (sctx->fixed_func_tcs_shader_cache) {
+      hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
+         sctx->b.delete_tcs_state(&sctx->b, entry->data);
+      }
+      _mesa_hash_table_destroy(sctx->fixed_func_tcs_shader_cache, NULL);
+   }
+
    if (sctx->custom_dsa_flush)
       sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
    if (sctx->custom_blend_resolve)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 80ec9ae4a79..80e03678366 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -999,7 +999,7 @@ struct si_context {
    struct si_screen *screen;
    struct util_debug_callback debug;
    struct ac_llvm_compiler compiler; /* only non-threaded compilation */
-   struct si_shader_ctx_state fixed_func_tcs_shader;
+   struct hash_table *fixed_func_tcs_shader_cache;
    struct si_resource *wait_mem_scratch;
    struct si_resource *wait_mem_scratch_tmz;
    unsigned wait_mem_number;
@@ -1076,6 +1076,8 @@ struct si_context {
       struct si_shader_ctx_state shaders[SI_NUM_GRAPHICS_SHADERS];
    };
    struct si_cs_shader_state cs_shader_state;
+   /* if current tcs set by user */
+   bool is_user_tcs;
 
    /* shader information */
    uint64_t ps_inputs_read_or_disabled;
@@ -1557,7 +1559,6 @@ void *si_create_passthrough_tcs(struct si_context *sctx);
 /* si_shaderlib_tgsi.c */
 void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
                         unsigned num_layers);
-void *si_create_fixed_func_tcs(struct si_context *sctx);
 void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
                                    bool dst_stream_cache_policy, bool is_copy);
 void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a3f74e519f1..b65cec6b978 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1239,8 +1239,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
          si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
       }
       fprintf(f, "  part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode);
-      fprintf(f, "  mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n",
-              key->ge.mono.u.ff_tcs_inputs_to_copy);
       fprintf(f, "  opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
       fprintf(f, "  opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices);
       break;
@@ -2018,12 +2016,8 @@ void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *k
    key->tcs_epilog.wave32 = shader->wave_size == 32;
    key->tcs_epilog.states = shader->key.ge.part.tcs.epilog;
 
-   /* If output patches are wholly in one wave, we don't need a barrier.
-    * The fixed-func TCS doesn't set tcs_vertices_out, but it won't use a barrier
-    * anyway because tess levels are always defined in all invocations there.
-    */
+   /* If output patches are wholly in one wave, we don't need a barrier. */
    key->tcs_epilog.noop_s_barrier =
-      shader->selector->info.base.tess.tcs_vertices_out &&
       shader->wave_size % shader->selector->info.base.tess.tcs_vertices_out == 0;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 8c38792e6de..8bed98dbd82 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -686,7 +686,6 @@ struct si_shader_key_ge {
       union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
 
       union {
-         uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */
          /* When PS needs PrimID and GS is disabled. */
          unsigned vs_export_prim_id : 1;    /* VS and TES only */
          unsigned gs_tri_strip_adj_fix : 1; /* GS only */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 781352191e5..5e9b3d441ab 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -1023,12 +1023,9 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
             /* If both input and output patches are wholly in one wave, we don't need a barrier.
              * That's true when both VS and TCS have the same number of patch vertices and
              * the wave size is a multiple of the number of patch vertices.
-             *
-             * The fixed-func TCS doesn't set tcs_vertices_out.
              */
             if (!shader->key.ge.opt.same_patch_vertices ||
-                (sel->info.base.tess.tcs_vertices_out &&
-                 ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0))
+                ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0)
                ac_build_s_barrier(&ctx->ac, ctx->stage);
          }
       } else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 67e896e1291..9ef5e241d50 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -71,9 +71,6 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
 {
    assert(ctx->stage == MESA_SHADER_TESS_CTRL);
 
-   if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
-      return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
-
    return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4;
 }
 
@@ -86,9 +83,6 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
 
 static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
 {
-   if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
-      return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
-
    const struct si_shader_info *info = &ctx->shader->selector->info;
    unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
    unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
@@ -141,7 +135,7 @@ LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx)
       ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out
                             : 0;
 
-   /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
+   /* If !tcs_out_vertices, it's the TCS epilog. */
    if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices)
       return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
 
@@ -550,41 +544,6 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
    }
 }
 
-/**
- * Forward all outputs from the vertex shader to the TES. This is only used
- * for the fixed function TCS.
- */
-static void si_copy_tcs_inputs(struct si_shader_context *ctx)
-{
-   LLVMValueRef invocation_id, buffer, buffer_offset;
-   LLVMValueRef lds_vertex_stride, lds_base;
-   uint64_t inputs;
-
-   invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
-   buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
-   buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
-
-   lds_vertex_stride = si_get_tcs_in_vertex_dw_stride(ctx);
-   lds_base = get_tcs_in_current_patch_offset(ctx);
-   lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
-
-   inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy;
-   while (inputs) {
-      unsigned i = u_bit_scan64(&inputs);
-
-      LLVMValueRef lds_ptr =
-         LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
-
-      LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
-         ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
-
-      LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
-
-      ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset,
-                                  ac_glc);
-   }
-}
-
 static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
                                   LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
                                   LLVMValueRef tcs_out_current_patch_data_offset,
@@ -751,8 +710,6 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
    LLVMBuilderRef builder = ctx->ac.builder;
    LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
 
-   si_copy_tcs_inputs(ctx);
-
    rel_patch_id = get_rel_patch_id(ctx);
    invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
    tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
index 045ea934cf4..0cc53483213 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
@@ -81,33 +81,6 @@ void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
    return *vs;
 }
 
-/**
- * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
- * VS passes its outputs to TES directly, so the fixed-function shader only
- * has to write TESSOUTER and TESSINNER.
- */
-void *si_create_fixed_func_tcs(struct si_context *sctx)
-{
-   struct ureg_src outer, inner;
-   struct ureg_dst tessouter, tessinner;
-   struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL);
-
-   if (!ureg)
-      return NULL;
-
-   outer = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL, 0);
-   inner = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL, 0);
-
-   tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
-   tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
-
-   ureg_MOV(ureg, tessouter, outer);
-   ureg_MOV(ureg, tessinner, inner);
-   ureg_END(ureg);
-
-   return ureg_create_shader_and_destroy(ureg, &sctx->b);
-}
-
 /* Create a compute shader implementing clear_buffer or copy_buffer. */
 void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
                                    bool dst_stream_cache_policy, bool is_copy)
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index ca830703198..e48e1665054 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -597,6 +597,7 @@ void si_init_tess_factor_ring(struct si_context *sctx);
 bool si_update_gs_ring_buffers(struct si_context *sctx);
 bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes);
 unsigned si_calc_inst_pref_size(struct si_shader *shader);
+bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx);
 
 /* si_state_draw.cpp */
 void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 876d1a93e15..8a0eba58303 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -126,28 +126,16 @@ static bool si_update_shaders(struct si_context *sctx)
             return false;
       }
 
-      if (sctx->shader.tcs.cso) {
-         r = si_shader_select(ctx, &sctx->shader.tcs);
-         if (r)
+      if (!sctx->is_user_tcs) {
+         if (!si_set_tcs_to_fixed_func_shader(sctx))
             return false;
-         si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
-      } else {
-         if (!sctx->fixed_func_tcs_shader.cso) {
-            sctx->fixed_func_tcs_shader.cso =
-               (struct si_shader_selector*)si_create_fixed_func_tcs(sctx);
-            if (!sctx->fixed_func_tcs_shader.cso)
-               return false;
-
-            sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
-               sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs;
-         }
-
-         r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
-         if (r)
-            return false;
-         si_pm4_bind_state(sctx, hs, sctx->fixed_func_tcs_shader.current);
       }
 
+      r = si_shader_select(ctx, &sctx->shader.tcs);
+      if (r)
+         return false;
+      si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
+
       if (!HAS_GS || GFX_VERSION <= GFX8) {
          r = si_shader_select(ctx, &sctx->shader.tes);
          if (r)
@@ -164,6 +152,12 @@ static bool si_update_shaders(struct si_context *sctx)
          }
       }
    } else {
+      /* Reset TCS to clear fixed function shader. */
+      if (!sctx->is_user_tcs && sctx->shader.tcs.cso) {
+         sctx->shader.tcs.cso = NULL;
+         sctx->shader.tcs.current = NULL;
+      }
+
       if (GFX_VERSION <= GFX8) {
          si_pm4_bind_state(sctx, ls, NULL);
          sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
@@ -626,10 +620,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
 {
    struct si_shader *ls_current;
    struct si_shader_selector *ls;
-   /* The TES pointer will only be used for sctx->last_tcs.
-    * It would be wrong to think that TCS = TES. */
-   struct si_shader_selector *tcs =
-      sctx->shader.tcs.cso ? sctx->shader.tcs.cso : sctx->shader.tes.cso;
+   struct si_shader_selector *tcs = sctx->shader.tcs.cso;
    unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
    bool has_primid_instancing_bug = sctx->gfx_level == GFX6 && sctx->screen->info.max_se == 1;
    unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
@@ -637,11 +628,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
 
    /* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
    if (sctx->gfx_level >= GFX9) {
-      if (sctx->shader.tcs.cso)
-         ls_current = sctx->shader.tcs.current;
-      else
-         ls_current = sctx->fixed_func_tcs_shader.current;
-
+      ls_current = sctx->shader.tcs.current;
       ls = ls_current->key.ge.part.tcs.ls;
    } else {
       ls_current = sctx->shader.vs.current;
@@ -663,19 +650,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
 
    /* This calculates how shader inputs and outputs among VS, TCS, and TES
     * are laid out in LDS. */
-   unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written);
-   unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs;
-
-   if (sctx->shader.tcs.cso) {
-      num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
-      num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
-      num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
-   } else {
-      /* No TCS. Route varyings from LS to TES. */
-      num_tcs_outputs = num_tcs_inputs;
-      num_tcs_output_cp = num_tcs_input_cp;
-      num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
-   }
+   unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
+   unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
+   unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
 
    unsigned input_vertex_size = ls->info.lshs_vertex_stride;
    unsigned output_vertex_size = num_tcs_outputs * 16;
@@ -2223,34 +2200,44 @@ static void si_draw(struct pipe_context *ctx,
    si_need_gfx_cs_space(sctx, num_draws);
 
    if (HAS_TESS) {
-      struct si_shader_selector *tcs = sctx->shader.tcs.cso;
+      if (sctx->is_user_tcs) {
+         struct si_shader_selector *tcs = sctx->shader.tcs.cso;
 
-      /* The rarely occuring tcs == NULL case is not optimized. */
-      bool same_patch_vertices =
-         GFX_VERSION >= GFX9 &&
-         tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
+         bool same_patch_vertices =
+            GFX_VERSION >= GFX9 &&
+            sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
 
-      if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
-         sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
-         sctx->do_update_shaders = true;
-      }
+         if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
+            sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
+            sctx->do_update_shaders = true;
+         }
 
-      if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
-         /* Determine whether the LS VGPR fix should be applied.
-          *
-          * It is only required when num input CPs > num output CPs,
-          * which cannot happen with the fixed function TCS. We should
-          * also update this bit when switching from TCS to fixed
-          * function TCS.
+         if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
+            /* Determine whether the LS VGPR fix should be applied.
+             *
+             * It is only required when num input CPs > num output CPs,
+             * which cannot happen with the fixed function TCS.
+             */
+            bool ls_vgpr_fix =
+               sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
+
+            if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
+               sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
+               sctx->do_update_shaders = true;
+            }
+         }
+      } else {
+         /* These fields are static for fixed function TCS. So no need to set
+          * do_update_shaders between fixed-TCS draws. As fixed-TCS to user-TCS
+          * or opposite, do_update_shaders should already be set by bind state.
           */
-         bool ls_vgpr_fix =
-            tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
+         sctx->shader.tcs.key.ge.opt.same_patch_vertices = GFX_VERSION >= GFX9;
+         sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = false;
 
-         if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
-            sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
-            sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
+         /* User may only change patch vertices, needs to update fixed func TCS. */
+         if (sctx->shader.tcs.cso &&
+             sctx->shader.tcs.cso->info.base.tess.tcs_vertices_out != sctx->patch_vertices)
             sctx->do_update_shaders = true;
-         }
       }
    }
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 015bb078da2..7fd3f9cf934 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -3373,7 +3373,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
    sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
    sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
    sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
-   sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0;
 
    if (si_update_ngg(sctx))
       si_shader_change_notify(sctx);
@@ -3486,6 +3485,11 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
    struct si_shader_selector *sel = (struct si_shader_selector*)state;
    bool enable_changed = !!sctx->shader.tcs.cso != !!sel;
 
+   /* Note it could happen that user shader sel is same as fixed function shader,
+    * so we should update this field even sctx->shader.tcs.cso == sel.
+    */
+   sctx->is_user_tcs = !!sel;
+
    if (sctx->shader.tcs.cso == sel)
       return;
 
@@ -3518,11 +3522,9 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
    si_update_tess_uses_prim_id(sctx);
 
    sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode =
-   sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode =
       sel ? sel->info.base.tess._primitive_mode : 0;
 
    sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors =
-   sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors =
       sel ? sel->info.reads_tess_factors : 0;
 
    si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
@@ -3976,17 +3978,8 @@ static int si_update_scratch_buffer(struct si_context *sctx, struct si_shader *s
    return 1;
 }
 
-static struct si_shader *si_get_tcs_current(struct si_context *sctx)
-{
-   if (!sctx->shader.tes.cso)
-      return NULL; /* tessellation disabled */
-
-   return sctx->shader.tcs.cso ? sctx->shader.tcs.current : sctx->fixed_func_tcs_shader.current;
-}
-
 static bool si_update_scratch_relocs(struct si_context *sctx)
 {
-   struct si_shader *tcs = si_get_tcs_current(sctx);
    int r;
 
    /* Update the shaders, so that they are using the latest scratch.
@@ -4006,11 +3999,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
    if (r == 1)
       si_pm4_bind_state(sctx, gs, sctx->shader.gs.current);
 
-   r = si_update_scratch_buffer(sctx, tcs);
+   r = si_update_scratch_buffer(sctx, sctx->shader.tcs.current);
    if (r < 0)
       return false;
    if (r == 1)
-      si_pm4_bind_state(sctx, hs, tcs);
+      si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
 
    /* VS can be bound as LS, ES, or VS. */
    r = si_update_scratch_buffer(sctx, sctx->shader.vs.current);
@@ -4251,6 +4244,53 @@ static void si_emit_scratch_state(struct si_context *sctx)
    }
 }
 
+struct si_fixed_func_tcs_shader_key {
+   uint64_t outputs_written;
+   uint8_t vertices_out;
+};
+
+static uint32_t si_fixed_func_tcs_shader_key_hash(const void *key)
+{
+   return _mesa_hash_data(key, sizeof(struct si_fixed_func_tcs_shader_key));
+}
+
+static bool si_fixed_func_tcs_shader_key_equals(const void *a, const void *b)
+{
+   return memcmp(a, b, sizeof(struct si_fixed_func_tcs_shader_key)) == 0;
+}
+
+bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx)
+{
+   if (!sctx->fixed_func_tcs_shader_cache) {
+      sctx->fixed_func_tcs_shader_cache = _mesa_hash_table_create(
+         NULL, si_fixed_func_tcs_shader_key_hash,
+         si_fixed_func_tcs_shader_key_equals);
+   }
+
+   struct si_fixed_func_tcs_shader_key key;
+   key.outputs_written = sctx->shader.vs.cso->info.outputs_written;
+   key.vertices_out = sctx->patch_vertices;
+
+   struct hash_entry *entry = _mesa_hash_table_search(
+      sctx->fixed_func_tcs_shader_cache, &key);
+
+   struct si_shader_selector *tcs;
+   if (entry)
+      tcs = (struct si_shader_selector *)entry->data;
+   else {
+      tcs = (struct si_shader_selector *)si_create_passthrough_tcs(sctx);
+      if (!tcs)
+         return false;
+      _mesa_hash_table_insert(sctx->fixed_func_tcs_shader_cache, &key, (void *)tcs);
+   }
+
+   sctx->shader.tcs.cso = tcs;
+   sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
+      tcs->info.tessfactors_are_def_in_all_invocs;
+
+   return true;
+}
+
 void si_init_screen_live_shader_cache(struct si_screen *sscreen)
 {
    util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,



More information about the mesa-commit mailing list