Mesa (main): radeonsi: assign param export indices before compilation
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Apr 22 22:47:28 UTC 2022
Module: Mesa
Branch: main
Commit: 3777a5d7157f679be4afecb89313ea0d9db47a8e
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3777a5d7157f679be4afecb89313ea0d9db47a8e
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sun Dec 12 20:50:58 2021 -0500
radeonsi: assign param export indices before compilation
This moves the logic out of LLVM-specific codepaths.
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
---
src/gallium/drivers/radeonsi/si_shader.c | 85 +++++++++++++++++++++++-
src/gallium/drivers/radeonsi/si_shader.h | 3 +-
src/gallium/drivers/radeonsi/si_shader_llvm.c | 28 --------
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c | 20 ++++++
src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 76 +++++----------------
5 files changed, 123 insertions(+), 89 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 87935110c34..2a90a07f214 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1580,6 +1580,52 @@ void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
}
+static void si_nir_assign_param_offsets(nir_shader *nir, const struct si_shader_info *info,
+ int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
+ uint8_t *num_param_exports, uint64_t *output_param_mask,
+ uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS])
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(nir);
+ assert(impl);
+
+ nir_foreach_block(block, impl) {
+ nir_foreach_instr_safe(instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ /* No indirect indexing allowed. */
+ ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
+ assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
+
+ assert(intr->num_components == 1); /* only scalar stores expected */
+ nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
+
+ /* Assign the param index if it's unassigned. */
+ if (nir_slot_is_varying(sem.location) && !sem.no_varying &&
+ (sem.gs_streams & 0x3) == 0 &&
+ vs_output_param_offset[sem.location] == AC_EXP_PARAM_DEFAULT_VAL_0000) {
+ /* The semantic and the base should be the same as in si_shader_info. */
+ assert(sem.location == info->output_semantic[nir_intrinsic_base(intr)]);
+ /* It must not be remapped (duplicated). */
+ assert(slot_remap[sem.location] == -1);
+
+ vs_output_param_offset[sem.location] = (*num_param_exports)++;
+ *output_param_mask |= BITFIELD64_BIT(nir_intrinsic_base(intr));
+ }
+ }
+ }
+
+ /* Duplicated outputs are redirected here. */
+ for (unsigned i = 0; i < NUM_TOTAL_VARYING_SLOTS; i++) {
+ if (slot_remap[i] >= 0)
+ vs_output_param_offset[i] = vs_output_param_offset[slot_remap[i]];
+ }
+}
+
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct util_debug_callback *debug)
{
@@ -1587,6 +1633,42 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
bool free_nir;
struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir);
+ /* Assign param export indices. */
+ if ((sel->stage == MESA_SHADER_VERTEX ||
+ sel->stage == MESA_SHADER_TESS_EVAL ||
+ (sel->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)) &&
+ !shader->key.ge.as_ls && !shader->key.ge.as_es) {
+ /* Initialize this first. */
+ shader->info.nr_param_exports = 0;
+ shader->info.vs_output_param_mask = 0;
+
+ STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
+ memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
+ sizeof(shader->info.vs_output_param_offset));
+
+ /* A slot remapping table for duplicated outputs, so that 1 vertex shader output can be
+ * mapped to multiple fragment shader inputs.
+ */
+ int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS];
+ memset(slot_remap, -1, NUM_TOTAL_VARYING_SLOTS);
+
+ /* This sets DEFAULT_VAL for constant outputs in vs_output_param_offset. */
+ /* TODO: This doesn't affect GS. */
+ NIR_PASS_V(nir, ac_nir_optimize_outputs, false, slot_remap,
+ shader->info.vs_output_param_offset);
+
+ /* Assign the non-constant outputs. */
+ /* TODO: Use this for the GS copy shader too. */
+ si_nir_assign_param_offsets(nir, &sel->info, slot_remap, &shader->info.nr_param_exports,
+ &shader->info.vs_output_param_mask,
+ shader->info.vs_output_param_offset);
+
+ if (shader->key.ge.mono.u.vs_export_prim_id) {
+ shader->info.vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = shader->info.nr_param_exports++;
+ shader->info.vs_output_param_mask |= BITFIELD64_BIT(sel->info.num_outputs);
+ }
+ }
+
struct pipe_stream_output_info so = {};
if (sel->info.enabled_streamout_buffer_mask)
nir_gather_stream_output_info(nir, &so);
@@ -1635,13 +1717,14 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
vs_output_param_offset = shader->gs_copy_shader->info.vs_output_param_offset;
+ /* We must use the original shader info before the removal of duplicated shader outputs. */
/* VS and TES should also set primitive ID output if it's used. */
unsigned num_outputs_with_prim_id = sel->info.num_outputs +
shader->key.ge.mono.u.vs_export_prim_id;
for (unsigned i = 0; i < num_outputs_with_prim_id; i++) {
unsigned semantic = sel->info.output_semantic[i];
- unsigned offset = vs_output_param_offset[i];
+ unsigned offset = vs_output_param_offset[semantic];
unsigned ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 5f9e59391b2..98408fb508a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -739,7 +739,8 @@ union si_shader_key {
/* GCN-specific shader info. */
struct si_shader_binary_info {
- ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
+ ubyte vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
+ uint64_t vs_output_param_mask; /* which params to export, indexed by "base" */
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
ubyte num_input_sgprs;
ubyte num_input_vgprs;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 5a4c76793d8..1c26e82842d 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -1061,31 +1061,6 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
return sel->stage == MESA_SHADER_COMPUTE && sel->info.num_memory_stores > 1000;
}
-static void si_optimize_vs_outputs(struct si_shader_context *ctx)
-{
- struct si_shader *shader = ctx->shader;
- struct si_shader_info *info = &shader->selector->info;
- unsigned skip_vs_optim_mask = 0;
-
- if ((ctx->stage != MESA_SHADER_VERTEX && ctx->stage != MESA_SHADER_TESS_EVAL) ||
- shader->key.ge.as_ls || shader->key.ge.as_es)
- return;
-
- /* Optimizing these outputs is not possible, since they might be overriden
- * at runtime with S_028644_PT_SPRITE_TEX. */
- for (int i = 0; i < info->num_outputs; i++) {
- if (info->output_semantic[i] == VARYING_SLOT_PNTC ||
- (info->output_semantic[i] >= VARYING_SLOT_TEX0 &&
- info->output_semantic[i] <= VARYING_SLOT_TEX7)) {
- skip_vs_optim_mask |= 1u << shader->info.vs_output_param_offset[i];
- }
- }
-
- ac_optimize_vs_outputs(&ctx->ac, ctx->main_fn, shader->info.vs_output_param_offset,
- info->num_outputs, skip_vs_optim_mask,
- &shader->info.nr_param_exports);
-}
-
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, const struct pipe_stream_output_info *so,
struct util_debug_callback *debug, struct nir_shader *nir,
@@ -1295,9 +1270,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_optimize_module(&ctx);
- /* Post-optimization transformations and analysis. */
- si_optimize_vs_outputs(&ctx);
-
/* Make sure the input is a pointer and not integer followed by inttoptr. */
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) == LLVMPointerTypeKind);
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
index e20af7e1358..0bde0d99259 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -22,6 +22,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
+#include "ac_nir.h"
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "sid.h"
@@ -444,6 +445,25 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->is_gs_copy_shader = true;
shader->wave_size = si_determine_wave_size(sscreen, shader);
+ STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
+ memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
+ sizeof(shader->info.vs_output_param_offset));
+
+ for (unsigned i = 0; i < gsinfo->num_outputs; i++) {
+ unsigned semantic = gsinfo->output_semantic[i];
+
+ /* Skip if no channel writes to stream 0. */
+ if (!nir_slot_is_varying(semantic) ||
+ (gsinfo->output_streams[i] & 0x03 &&
+ gsinfo->output_streams[i] & 0x0c &&
+ gsinfo->output_streams[i] & 0x30 &&
+ gsinfo->output_streams[i] & 0xc0))
+ continue;
+
+ shader->info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
+ shader->info.vs_output_param_mask |= BITFIELD64_BIT(i);
+ }
+
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
ctx.shader = shader;
ctx.stage = MESA_SHADER_VERTEX;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index ab984f2f7fb..b54fc86ed2e 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -438,61 +438,6 @@ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLV
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
}
-static void si_prepare_param_exports(struct si_shader_context *ctx,
- const struct si_shader_output_values *outputs, unsigned noutput,
- struct ac_export_args exports[32])
-{
- struct si_shader *shader = ctx->shader;
- unsigned param_count = 0;
-
- memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
- sizeof(shader->info.vs_output_param_offset));
-
- for (unsigned i = 0; i < noutput; i++) {
- unsigned semantic = outputs[i].semantic;
-
- /* Skip if no channel writes to stream 0. */
- if (outputs[i].vertex_streams & 0x03 &&
- outputs[i].vertex_streams & 0x0c &&
- outputs[i].vertex_streams & 0x30 &&
- outputs[i].vertex_streams & 0xc0)
- continue;
-
- switch (semantic) {
- case VARYING_SLOT_LAYER:
- case VARYING_SLOT_VIEWPORT:
- case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
- case VARYING_SLOT_COL0:
- case VARYING_SLOT_COL1:
- case VARYING_SLOT_BFC0:
- case VARYING_SLOT_BFC1:
- case VARYING_SLOT_PRIMITIVE_ID:
- case VARYING_SLOT_FOGC:
- break;
- default:
- if ((semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7) ||
- semantic >= VARYING_SLOT_VAR0)
- break;
- else
- continue;
- }
-
- if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
- shader->key.ge.opt.kill_outputs &
- (1ull << si_shader_io_get_unique_index(semantic, true)))
- continue;
-
- si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_PARAM + param_count,
- &exports[param_count]);
-
- assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
- shader->info.vs_output_param_offset[i] = param_count++;
- }
-
- shader->info.nr_param_exports = param_count;
-}
-
/**
* Vertex color clamping.
*
@@ -576,9 +521,6 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
si_vertex_color_clamping(ctx, outputs, noutput);
- struct ac_export_args param_exports[32];
- si_prepare_param_exports(ctx, outputs, noutput, param_exports);
-
/* Build position exports. */
for (i = 0; i < noutput; i++) {
switch (outputs[i].semantic) {
@@ -747,7 +689,23 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
ac_build_export(&ctx->ac, &pos_args[i]);
}
- /* Build parameter exports. */
+ /* Build parameter exports. Use 2 loops to export params in ascending order.
+ * 32 is the maximum number of parameter exports.
+ */
+ struct ac_export_args param_exports[32] = {};
+ uint64_t vs_output_param_mask = shader->info.vs_output_param_mask;
+
+ while (vs_output_param_mask) {
+ unsigned i = u_bit_scan64(&vs_output_param_mask);
+ unsigned offset = shader->info.vs_output_param_offset[outputs[i].semantic];
+
+ assert(offset <= AC_EXP_PARAM_OFFSET_31);
+ assert(!param_exports[offset].enabled_channels);
+
+ si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_PARAM + offset,
+ ¶m_exports[offset]);
+ }
+
for (unsigned i = 0; i < shader->info.nr_param_exports; i++)
ac_build_export(&ctx->ac, ¶m_exports[i]);
}
More information about the mesa-commit
mailing list