Mesa (main): radeonsi: replace llvm based fixed tcs with nir
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jun 27 03:16:28 UTC 2022
Module: Mesa
Branch: main
Commit: a1763ad4b362c9f3a1fd12b6d06009b17fac3d24
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a1763ad4b362c9f3a1fd12b6d06009b17fac3d24
Author: Qiang Yu <yuq825 at gmail.com>
Date: Fri May 20 17:27:27 2022 +0800
radeonsi: replace llvm based fixed tcs with nir
Create nir passthrough shader with explicit input/output and vertex
output count so that it can be handled by compiler same as user tcs.
The drawback is we create more si_shader_selector with different
input/output and vertex output count which was handled by compiler
backend before.
As fixed function tcs can be handled like user tcs, we don't need
the dedicated fixed_func_tcs_shader state either.
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Signed-off-by: Qiang Yu <yuq825 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16705>
---
src/gallium/drivers/radeonsi/si_debug.c | 10 +-
src/gallium/drivers/radeonsi/si_pipe.c | 9 +-
src/gallium/drivers/radeonsi/si_pipe.h | 5 +-
src/gallium/drivers/radeonsi/si_shader.c | 8 +-
src/gallium/drivers/radeonsi/si_shader.h | 1 -
src/gallium/drivers/radeonsi/si_shader_llvm.c | 5 +-
src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 45 +-------
src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c | 27 -----
src/gallium/drivers/radeonsi/si_state.h | 1 +
src/gallium/drivers/radeonsi/si_state_draw.cpp | 113 +++++++++------------
src/gallium/drivers/radeonsi/si_state_shaders.cpp | 68 ++++++++++---
11 files changed, 120 insertions(+), 172 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 85f5667f2a7..d1deb09cca6 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -1036,19 +1036,13 @@ static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flag
void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
{
- struct si_shader_ctx_state *tcs_shader;
-
if (!log)
return;
- tcs_shader = &sctx->shader.tcs;
- if (sctx->shader.tes.cso && !sctx->shader.tcs.cso)
- tcs_shader = &sctx->fixed_func_tcs_shader;
-
si_dump_framebuffer(sctx, log);
si_dump_gfx_shader(sctx, &sctx->shader.vs, log);
- si_dump_gfx_shader(sctx, tcs_shader, log);
+ si_dump_gfx_shader(sctx, &sctx->shader.tcs, log);
si_dump_gfx_shader(sctx, &sctx->shader.tes, log);
si_dump_gfx_shader(sctx, &sctx->shader.gs, log);
si_dump_gfx_shader(sctx, &sctx->shader.ps, log);
@@ -1057,7 +1051,7 @@ void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
4, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity,
log);
si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log);
- si_dump_gfx_descriptors(sctx, tcs_shader, log);
+ si_dump_gfx_descriptors(sctx, &sctx->shader.tcs, log);
si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log);
si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log);
si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 659223fb888..c8fc1bdcc3d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -228,8 +228,13 @@ static void si_destroy_context(struct pipe_context *context)
for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
- if (sctx->fixed_func_tcs_shader.cso)
- sctx->b.delete_tcs_state(&sctx->b, sctx->fixed_func_tcs_shader.cso);
+ if (sctx->fixed_func_tcs_shader_cache) {
+ hash_table_foreach(sctx->fixed_func_tcs_shader_cache, entry) {
+ sctx->b.delete_tcs_state(&sctx->b, entry->data);
+ }
+ _mesa_hash_table_destroy(sctx->fixed_func_tcs_shader_cache, NULL);
+ }
+
if (sctx->custom_dsa_flush)
sctx->b.delete_depth_stencil_alpha_state(&sctx->b, sctx->custom_dsa_flush);
if (sctx->custom_blend_resolve)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 80ec9ae4a79..80e03678366 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -999,7 +999,7 @@ struct si_context {
struct si_screen *screen;
struct util_debug_callback debug;
struct ac_llvm_compiler compiler; /* only non-threaded compilation */
- struct si_shader_ctx_state fixed_func_tcs_shader;
+ struct hash_table *fixed_func_tcs_shader_cache;
struct si_resource *wait_mem_scratch;
struct si_resource *wait_mem_scratch_tmz;
unsigned wait_mem_number;
@@ -1076,6 +1076,8 @@ struct si_context {
struct si_shader_ctx_state shaders[SI_NUM_GRAPHICS_SHADERS];
};
struct si_cs_shader_state cs_shader_state;
+ /* if current tcs set by user */
+ bool is_user_tcs;
/* shader information */
uint64_t ps_inputs_read_or_disabled;
@@ -1557,7 +1559,6 @@ void *si_create_passthrough_tcs(struct si_context *sctx);
/* si_shaderlib_tgsi.c */
void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
unsigned num_layers);
-void *si_create_fixed_func_tcs(struct si_context *sctx);
void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
bool dst_stream_cache_policy, bool is_copy);
void *si_create_clear_buffer_rmw_cs(struct si_context *sctx);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index a3f74e519f1..b65cec6b978 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1239,8 +1239,6 @@ static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
si_dump_shader_key_vs(key, &key->ge.part.tcs.ls_prolog, "part.tcs.ls_prolog", f);
}
fprintf(f, " part.tcs.epilog.prim_mode = %u\n", key->ge.part.tcs.epilog.prim_mode);
- fprintf(f, " mono.u.ff_tcs_inputs_to_copy = 0x%" PRIx64 "\n",
- key->ge.mono.u.ff_tcs_inputs_to_copy);
fprintf(f, " opt.prefer_mono = %u\n", key->ge.opt.prefer_mono);
fprintf(f, " opt.same_patch_vertices = %u\n", key->ge.opt.same_patch_vertices);
break;
@@ -2018,12 +2016,8 @@ void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *k
key->tcs_epilog.wave32 = shader->wave_size == 32;
key->tcs_epilog.states = shader->key.ge.part.tcs.epilog;
- /* If output patches are wholly in one wave, we don't need a barrier.
- * The fixed-func TCS doesn't set tcs_vertices_out, but it won't use a barrier
- * anyway because tess levels are always defined in all invocations there.
- */
+ /* If output patches are wholly in one wave, we don't need a barrier. */
key->tcs_epilog.noop_s_barrier =
- shader->selector->info.base.tess.tcs_vertices_out &&
shader->wave_size % shader->selector->info.base.tess.tcs_vertices_out == 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 8c38792e6de..8bed98dbd82 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -686,7 +686,6 @@ struct si_shader_key_ge {
union si_vs_fix_fetch vs_fix_fetch[SI_MAX_ATTRIBS];
union {
- uint64_t ff_tcs_inputs_to_copy; /* fixed-func TCS only */
/* When PS needs PrimID and GS is disabled. */
unsigned vs_export_prim_id : 1; /* VS and TES only */
unsigned gs_tri_strip_adj_fix : 1; /* GS only */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 781352191e5..5e9b3d441ab 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -1023,12 +1023,9 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
/* If both input and output patches are wholly in one wave, we don't need a barrier.
* That's true when both VS and TCS have the same number of patch vertices and
* the wave size is a multiple of the number of patch vertices.
- *
- * The fixed-func TCS doesn't set tcs_vertices_out.
*/
if (!shader->key.ge.opt.same_patch_vertices ||
- (sel->info.base.tess.tcs_vertices_out &&
- ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0))
+ ctx->ac.wave_size % sel->info.base.tess.tcs_vertices_out != 0)
ac_build_s_barrier(&ctx->ac, ctx->stage);
}
} else if (ctx->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 67e896e1291..9ef5e241d50 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -71,9 +71,6 @@ static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *
{
assert(ctx->stage == MESA_SHADER_TESS_CTRL);
- if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
- return util_last_bit64(ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy) * 4;
-
return util_last_bit64(ctx->shader->selector->info.outputs_written) * 4;
}
@@ -86,9 +83,6 @@ static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
{
- if (ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy)
- return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
-
const struct si_shader_info *info = &ctx->shader->selector->info;
unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
@@ -141,7 +135,7 @@ LLVMValueRef si_get_num_tcs_out_vertices(struct si_shader_context *ctx)
ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out
: 0;
- /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
+ /* If !tcs_out_vertices, it's the TCS epilog. */
if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices)
return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
@@ -550,41 +544,6 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
}
}
-/**
- * Forward all outputs from the vertex shader to the TES. This is only used
- * for the fixed function TCS.
- */
-static void si_copy_tcs_inputs(struct si_shader_context *ctx)
-{
- LLVMValueRef invocation_id, buffer, buffer_offset;
- LLVMValueRef lds_vertex_stride, lds_base;
- uint64_t inputs;
-
- invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
- buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
- buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
-
- lds_vertex_stride = si_get_tcs_in_vertex_dw_stride(ctx);
- lds_base = get_tcs_in_current_patch_offset(ctx);
- lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
-
- inputs = ctx->shader->key.ge.mono.u.ff_tcs_inputs_to_copy;
- while (inputs) {
- unsigned i = u_bit_scan64(&inputs);
-
- LLVMValueRef lds_ptr =
- LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
-
- LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
- ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
-
- LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
-
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, buffer_addr, buffer_offset,
- ac_glc);
- }
-}
-
static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
@@ -751,8 +710,6 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
- si_copy_tcs_inputs(ctx);
-
rel_patch_id = get_rel_patch_id(ctx);
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
diff --git a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
index 045ea934cf4..0cc53483213 100644
--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
@@ -81,33 +81,6 @@ void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
return *vs;
}
-/**
- * This is used when TCS is NULL in the VS->TCS->TES chain. In this case,
- * VS passes its outputs to TES directly, so the fixed-function shader only
- * has to write TESSOUTER and TESSINNER.
- */
-void *si_create_fixed_func_tcs(struct si_context *sctx)
-{
- struct ureg_src outer, inner;
- struct ureg_dst tessouter, tessinner;
- struct ureg_program *ureg = ureg_create(PIPE_SHADER_TESS_CTRL);
-
- if (!ureg)
- return NULL;
-
- outer = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL, 0);
- inner = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL, 0);
-
- tessouter = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSOUTER, 0);
- tessinner = ureg_DECL_output(ureg, TGSI_SEMANTIC_TESSINNER, 0);
-
- ureg_MOV(ureg, tessouter, outer);
- ureg_MOV(ureg, tessinner, inner);
- ureg_END(ureg);
-
- return ureg_create_shader_and_destroy(ureg, &sctx->b);
-}
-
/* Create a compute shader implementing clear_buffer or copy_buffer. */
void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,
bool dst_stream_cache_policy, bool is_copy)
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index ca830703198..e48e1665054 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -597,6 +597,7 @@ void si_init_tess_factor_ring(struct si_context *sctx);
bool si_update_gs_ring_buffers(struct si_context *sctx);
bool si_update_spi_tmpring_size(struct si_context *sctx, unsigned bytes);
unsigned si_calc_inst_pref_size(struct si_shader *shader);
+bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx);
/* si_state_draw.cpp */
void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 876d1a93e15..8a0eba58303 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -126,28 +126,16 @@ static bool si_update_shaders(struct si_context *sctx)
return false;
}
- if (sctx->shader.tcs.cso) {
- r = si_shader_select(ctx, &sctx->shader.tcs);
- if (r)
+ if (!sctx->is_user_tcs) {
+ if (!si_set_tcs_to_fixed_func_shader(sctx))
return false;
- si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
- } else {
- if (!sctx->fixed_func_tcs_shader.cso) {
- sctx->fixed_func_tcs_shader.cso =
- (struct si_shader_selector*)si_create_fixed_func_tcs(sctx);
- if (!sctx->fixed_func_tcs_shader.cso)
- return false;
-
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
- sctx->fixed_func_tcs_shader.cso->info.tessfactors_are_def_in_all_invocs;
- }
-
- r = si_shader_select(ctx, &sctx->fixed_func_tcs_shader);
- if (r)
- return false;
- si_pm4_bind_state(sctx, hs, sctx->fixed_func_tcs_shader.current);
}
+ r = si_shader_select(ctx, &sctx->shader.tcs);
+ if (r)
+ return false;
+ si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
+
if (!HAS_GS || GFX_VERSION <= GFX8) {
r = si_shader_select(ctx, &sctx->shader.tes);
if (r)
@@ -164,6 +152,12 @@ static bool si_update_shaders(struct si_context *sctx)
}
}
} else {
+ /* Reset TCS to clear fixed function shader. */
+ if (!sctx->is_user_tcs && sctx->shader.tcs.cso) {
+ sctx->shader.tcs.cso = NULL;
+ sctx->shader.tcs.current = NULL;
+ }
+
if (GFX_VERSION <= GFX8) {
si_pm4_bind_state(sctx, ls, NULL);
sctx->prefetch_L2_mask &= ~SI_PREFETCH_LS;
@@ -626,10 +620,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
{
struct si_shader *ls_current;
struct si_shader_selector *ls;
- /* The TES pointer will only be used for sctx->last_tcs.
- * It would be wrong to think that TCS = TES. */
- struct si_shader_selector *tcs =
- sctx->shader.tcs.cso ? sctx->shader.tcs.cso : sctx->shader.tes.cso;
+ struct si_shader_selector *tcs = sctx->shader.tcs.cso;
unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
bool has_primid_instancing_bug = sctx->gfx_level == GFX6 && sctx->screen->info.max_se == 1;
unsigned tes_sh_base = sctx->shader_pointers.sh_base[PIPE_SHADER_TESS_EVAL];
@@ -637,11 +628,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
/* Since GFX9 has merged LS-HS in the TCS state, set LS = TCS. */
if (sctx->gfx_level >= GFX9) {
- if (sctx->shader.tcs.cso)
- ls_current = sctx->shader.tcs.current;
- else
- ls_current = sctx->fixed_func_tcs_shader.current;
-
+ ls_current = sctx->shader.tcs.current;
ls = ls_current->key.ge.part.tcs.ls;
} else {
ls_current = sctx->shader.vs.current;
@@ -663,19 +650,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx, unsigned *num_pa
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
- unsigned num_tcs_inputs = util_last_bit64(ls->info.outputs_written);
- unsigned num_tcs_output_cp, num_tcs_outputs, num_tcs_patch_outputs;
-
- if (sctx->shader.tcs.cso) {
- num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
- num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
- num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
- } else {
- /* No TCS. Route varyings from LS to TES. */
- num_tcs_outputs = num_tcs_inputs;
- num_tcs_output_cp = num_tcs_input_cp;
- num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */
- }
+ unsigned num_tcs_outputs = util_last_bit64(tcs->info.outputs_written);
+ unsigned num_tcs_output_cp = tcs->info.base.tess.tcs_vertices_out;
+ unsigned num_tcs_patch_outputs = util_last_bit64(tcs->info.patch_outputs_written);
unsigned input_vertex_size = ls->info.lshs_vertex_stride;
unsigned output_vertex_size = num_tcs_outputs * 16;
@@ -2223,34 +2200,44 @@ static void si_draw(struct pipe_context *ctx,
si_need_gfx_cs_space(sctx, num_draws);
if (HAS_TESS) {
- struct si_shader_selector *tcs = sctx->shader.tcs.cso;
+ if (sctx->is_user_tcs) {
+ struct si_shader_selector *tcs = sctx->shader.tcs.cso;
- /* The rarely occuring tcs == NULL case is not optimized. */
- bool same_patch_vertices =
- GFX_VERSION >= GFX9 &&
- tcs && sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
+ bool same_patch_vertices =
+ GFX_VERSION >= GFX9 &&
+ sctx->patch_vertices == tcs->info.base.tess.tcs_vertices_out;
- if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
- sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
- sctx->do_update_shaders = true;
- }
+ if (sctx->shader.tcs.key.ge.opt.same_patch_vertices != same_patch_vertices) {
+ sctx->shader.tcs.key.ge.opt.same_patch_vertices = same_patch_vertices;
+ sctx->do_update_shaders = true;
+ }
- if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
- /* Determine whether the LS VGPR fix should be applied.
- *
- * It is only required when num input CPs > num output CPs,
- * which cannot happen with the fixed function TCS. We should
- * also update this bit when switching from TCS to fixed
- * function TCS.
+ if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
+ /* Determine whether the LS VGPR fix should be applied.
+ *
+ * It is only required when num input CPs > num output CPs,
+ * which cannot happen with the fixed function TCS.
+ */
+ bool ls_vgpr_fix =
+ sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
+
+ if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
+ sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+ } else {
+ /* These fields are static for fixed function TCS. So no need to set
+ * do_update_shaders between fixed-TCS draws. As fixed-TCS to user-TCS
+ * or opposite, do_update_shaders should already be set by bind state.
*/
- bool ls_vgpr_fix =
- tcs && sctx->patch_vertices > tcs->info.base.tess.tcs_vertices_out;
+ sctx->shader.tcs.key.ge.opt.same_patch_vertices = GFX_VERSION >= GFX9;
+ sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = false;
- if (ls_vgpr_fix != sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix) {
- sctx->shader.tcs.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.ls_prolog.ls_vgpr_fix = ls_vgpr_fix;
+ /* User may only change patch vertices, needs to update fixed func TCS. */
+ if (sctx->shader.tcs.cso &&
+ sctx->shader.tcs.cso->info.base.tess.tcs_vertices_out != sctx->patch_vertices)
sctx->do_update_shaders = true;
- }
}
}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 015bb078da2..7fd3f9cf934 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -3373,7 +3373,6 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
sctx->shader.vs.current = (sel && sel->variants_count) ? sel->variants[0] : NULL;
sctx->num_vs_blit_sgprs = sel ? sel->info.base.vs.blit_sgprs_amd : 0;
sctx->vs_uses_draw_id = sel ? sel->info.uses_drawid : false;
- sctx->fixed_func_tcs_shader.key.ge.mono.u.ff_tcs_inputs_to_copy = sel ? sel->info.outputs_written : 0;
if (si_update_ngg(sctx))
si_shader_change_notify(sctx);
@@ -3486,6 +3485,11 @@ static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
struct si_shader_selector *sel = (struct si_shader_selector*)state;
bool enable_changed = !!sctx->shader.tcs.cso != !!sel;
+ /* Note it could happen that user shader sel is same as fixed function shader,
+ * so we should update this field even sctx->shader.tcs.cso == sel.
+ */
+ sctx->is_user_tcs = !!sel;
+
if (sctx->shader.tcs.cso == sel)
return;
@@ -3518,11 +3522,9 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_update_tess_uses_prim_id(sctx);
sctx->shader.tcs.key.ge.part.tcs.epilog.prim_mode =
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.prim_mode =
sel ? sel->info.base.tess._primitive_mode : 0;
sctx->shader.tcs.key.ge.part.tcs.epilog.tes_reads_tess_factors =
- sctx->fixed_func_tcs_shader.key.ge.part.tcs.epilog.tes_reads_tess_factors =
sel ? sel->info.reads_tess_factors : 0;
si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
@@ -3976,17 +3978,8 @@ static int si_update_scratch_buffer(struct si_context *sctx, struct si_shader *s
return 1;
}
-static struct si_shader *si_get_tcs_current(struct si_context *sctx)
-{
- if (!sctx->shader.tes.cso)
- return NULL; /* tessellation disabled */
-
- return sctx->shader.tcs.cso ? sctx->shader.tcs.current : sctx->fixed_func_tcs_shader.current;
-}
-
static bool si_update_scratch_relocs(struct si_context *sctx)
{
- struct si_shader *tcs = si_get_tcs_current(sctx);
int r;
/* Update the shaders, so that they are using the latest scratch.
@@ -4006,11 +3999,11 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
if (r == 1)
si_pm4_bind_state(sctx, gs, sctx->shader.gs.current);
- r = si_update_scratch_buffer(sctx, tcs);
+ r = si_update_scratch_buffer(sctx, sctx->shader.tcs.current);
if (r < 0)
return false;
if (r == 1)
- si_pm4_bind_state(sctx, hs, tcs);
+ si_pm4_bind_state(sctx, hs, sctx->shader.tcs.current);
/* VS can be bound as LS, ES, or VS. */
r = si_update_scratch_buffer(sctx, sctx->shader.vs.current);
@@ -4251,6 +4244,53 @@ static void si_emit_scratch_state(struct si_context *sctx)
}
}
+struct si_fixed_func_tcs_shader_key {
+ uint64_t outputs_written;
+ uint8_t vertices_out;
+};
+
+static uint32_t si_fixed_func_tcs_shader_key_hash(const void *key)
+{
+ return _mesa_hash_data(key, sizeof(struct si_fixed_func_tcs_shader_key));
+}
+
+static bool si_fixed_func_tcs_shader_key_equals(const void *a, const void *b)
+{
+ return memcmp(a, b, sizeof(struct si_fixed_func_tcs_shader_key)) == 0;
+}
+
+bool si_set_tcs_to_fixed_func_shader(struct si_context *sctx)
+{
+ if (!sctx->fixed_func_tcs_shader_cache) {
+ sctx->fixed_func_tcs_shader_cache = _mesa_hash_table_create(
+ NULL, si_fixed_func_tcs_shader_key_hash,
+ si_fixed_func_tcs_shader_key_equals);
+ }
+
+ struct si_fixed_func_tcs_shader_key key;
+ key.outputs_written = sctx->shader.vs.cso->info.outputs_written;
+ key.vertices_out = sctx->patch_vertices;
+
+ struct hash_entry *entry = _mesa_hash_table_search(
+ sctx->fixed_func_tcs_shader_cache, &key);
+
+ struct si_shader_selector *tcs;
+ if (entry)
+ tcs = (struct si_shader_selector *)entry->data;
+ else {
+ tcs = (struct si_shader_selector *)si_create_passthrough_tcs(sctx);
+ if (!tcs)
+ return false;
+ _mesa_hash_table_insert(sctx->fixed_func_tcs_shader_cache, &key, (void *)tcs);
+ }
+
+ sctx->shader.tcs.cso = tcs;
+ sctx->shader.tcs.key.ge.part.tcs.epilog.invoc0_tess_factors_are_def =
+ tcs->info.tessfactors_are_def_in_all_invocs;
+
+ return true;
+}
+
void si_init_screen_live_shader_cache(struct si_screen *sscreen)
{
util_live_shader_cache_init(&sscreen->live_shader_cache, si_create_shader_selector,
More information about the mesa-commit
mailing list