Mesa (main): radeonsi: replace llvm tcs output with nir lower pass
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jun 27 03:16:29 UTC 2022
Module: Mesa
Branch: main
Commit: 7598bfd768f02d1d77007ebc07990db9c83a6fb4
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7598bfd768f02d1d77007ebc07990db9c83a6fb4
Author: Qiang Yu <yuq825 at gmail.com>
Date: Sat May 28 17:52:35 2022 +0800
radeonsi: replace llvm tcs output with nir lower pass
Remove the store_tcs_outputs abi, we can use common output abi
to handle the tessfactor pass as vgpr.
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Signed-off-by: Qiang Yu <yuq825 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16705>
---
src/amd/llvm/ac_nir_to_llvm.c | 20 +--
src/amd/llvm/ac_shader_abi.h | 6 -
src/gallium/drivers/radeonsi/si_shader.c | 24 +++-
src/gallium/drivers/radeonsi/si_shader_internal.h | 1 -
src/gallium/drivers/radeonsi/si_shader_llvm.c | 5 -
src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 152 ++++-----------------
6 files changed, 49 insertions(+), 159 deletions(-)
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 7fc61f40936..00511ab487f 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -2367,12 +2367,9 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
unsigned component = nir_intrinsic_component(instr);
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
nir_src offset = *nir_get_io_offset_src(instr);
- LLVMValueRef indir_index = NULL;
- if (nir_src_is_const(offset))
- assert(nir_src_as_uint(offset) == 0);
- else
- indir_index = get_src(ctx, offset);
+ /* No indirect indexing is allowed here. */
+ assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
case 16:
@@ -2388,19 +2385,6 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
writemask <<= component;
- if (ctx->stage == MESA_SHADER_TESS_CTRL) {
- nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr);
- LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
- unsigned location = nir_intrinsic_io_semantics(instr).location;
-
- ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src,
- writemask, component, location, base);
- return;
- }
-
- /* No indirect indexing is allowed after this point. */
- assert(!indir_index);
-
for (unsigned chan = 0; chan < 8; chan++) {
if (!(writemask & (1 << chan)))
continue;
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index 4584d52d120..ee9e17a88d0 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -69,12 +69,6 @@ struct ac_shader_abi {
unsigned driver_location, unsigned component,
unsigned num_components, bool load_inputs);
- void (*store_tcs_outputs)(struct ac_shader_abi *abi,
- LLVMValueRef vertex_index, LLVMValueRef param_index,
- LLVMValueRef src, unsigned writemask,
- unsigned component, unsigned location, unsigned driver_location);
-
-
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
/**
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b65cec6b978..5e102df047d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1491,13 +1491,20 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key)
static unsigned si_map_io_driver_location(unsigned semantic)
{
+ if ((semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX) ||
+ semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
+ semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
+ return si_shader_io_get_unique_index_patch(semantic);
+
return si_shader_io_get_unique_index(semantic, false);
}
-static bool si_lower_io_to_mem(const union si_shader_key *key,
- nir_shader *nir,
+static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
uint64_t tcs_vgpr_only_inputs)
{
+ struct si_shader_selector *sel = shader->selector;
+ const union si_shader_key *key = &shader->key;
+
if (nir->info.stage == MESA_SHADER_VERTEX) {
if (key->ge.as_ls) {
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location,
@@ -1507,6 +1514,17 @@ static bool si_lower_io_to_mem(const union si_shader_key *key,
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
key->ge.opt.same_patch_vertices);
+ NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, si_map_io_driver_location,
+ sel->screen->info.gfx_level,
+ false, /* does not matter as we disabled final tess factor write */
+ ~0ULL, ~0ULL, /* no TES inputs filter */
+ util_last_bit64(sel->info.outputs_written),
+ util_last_bit64(sel->info.patch_outputs_written),
+ shader->wave_size,
+ /* ALL TCS inputs are passed by register. */
+ key->ge.opt.same_patch_vertices &&
+ !(sel->info.base.inputs_read & ~sel->info.tcs_vgpr_only_inputs),
+ sel->info.tessfactors_are_def_in_all_invocs, false);
return true;
}
@@ -1633,7 +1651,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir,
*/
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
- bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs);
+ bool opt_offsets = si_lower_io_to_mem(shader, nir, tcs_vgpr_only_inputs);
if (progress2 || opt_offsets)
si_nir_opts(sel->screen, nir, false);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index cbb4a510912..6bda67d5b7b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -144,7 +144,6 @@ struct si_shader_context {
LLVMValueRef gsvs_ring[4];
LLVMValueRef tess_offchip_ring;
- LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
LLVMValueRef gs_next_vertex[4];
LLVMValueRef gs_curprim_verts[4];
LLVMValueRef gs_generated_prims[4];
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 8f3263ab4b0..a1de3d85a35 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -834,11 +834,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
case MESA_SHADER_TESS_CTRL:
si_llvm_init_tcs_callbacks(ctx);
si_llvm_preload_tess_rings(ctx);
-
- if (sel->info.tessfactors_are_def_in_all_invocs) {
- for (unsigned i = 0; i < 6; i++)
- ctx->invoc0_tess_factors[i] = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
- }
break;
case MESA_SHADER_TESS_EVAL:
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 70ab6e310a7..4d75f291ccb 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -383,42 +383,18 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
- LLVMValueRef value[4];
-
- if (load_input) {
- assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
-
- ubyte semantic = info->input[driver_location].semantic;
- /* Load the TCS input from a VGPR. */
- unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
- si_shader_io_get_unique_index(semantic, false) * 4;
- for (unsigned i = component; i < component + num_components; i++) {
- value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
- value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
- }
- } else {
- ubyte semantic = info->output_semantic[driver_location];
-
- bool is_patch = vertex_index == NULL;
- assert((semantic >= VARYING_SLOT_PATCH0 ||
- semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
- semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
-
- LLVMValueRef dw_addr, stride;
- if (is_patch) {
- stride = NULL;
- dw_addr = get_tcs_out_current_patch_data_offset(ctx);
- } else {
- stride = get_tcs_out_vertex_dw_stride(ctx);
- dw_addr = get_tcs_out_current_patch_offset(ctx);
- }
+ assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
- dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index,
- param_index, semantic);
+ ubyte semantic = info->input[driver_location].semantic;
+ /* Load the TCS input from a VGPR. */
+ unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
+ si_shader_io_get_unique_index(semantic, false) * 4;
- for (unsigned i = component; i < component + num_components; i++)
- value[i] = lshs_lds_load(ctx, type, i, dw_addr);
+ LLVMValueRef value[4];
+ for (unsigned i = component; i < component + num_components; i++) {
+ value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
+ value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
}
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
@@ -455,96 +431,6 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
-static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
- LLVMValueRef vertex_index, LLVMValueRef param_index,
- LLVMValueRef src, unsigned writemask,
- unsigned component, unsigned location, unsigned driver_location)
-{
- struct si_shader_context *ctx = si_shader_context_from_abi(abi);
- struct si_shader_info *info = &ctx->shader->selector->info;
- LLVMValueRef dw_addr, stride;
- LLVMValueRef buffer, base, addr;
- LLVMValueRef values[8];
- bool is_tess_factor = false, is_tess_inner = false;
-
- ubyte semantic = info->output_semantic[driver_location];
-
- const bool is_const = !param_index;
- const bool is_patch = vertex_index == NULL;
-
- /* Invalid SPIR-V can cause this. */
- if ((semantic >= VARYING_SLOT_PATCH0 || semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
- semantic == VARYING_SLOT_TESS_LEVEL_OUTER) != is_patch)
- return;
-
- if (!is_patch) {
- stride = get_tcs_out_vertex_dw_stride(ctx);
- dw_addr = get_tcs_out_current_patch_offset(ctx);
- dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
- semantic);
- } else {
- dw_addr = get_tcs_out_current_patch_data_offset(ctx);
- dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,
- semantic);
-
- if (is_const) {
- int semantic = info->output_semantic[driver_location];
-
- /* Always write tess factors into LDS for the TCS epilog. */
- if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
- semantic == VARYING_SLOT_TESS_LEVEL_OUTER) {
- is_tess_factor = true;
- is_tess_inner = semantic == VARYING_SLOT_TESS_LEVEL_INNER;
- }
- }
- }
-
- buffer = ctx->tess_offchip_ring;
-
- base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
-
- addr =
- get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
-
- for (unsigned chan = component; chan < 4; chan++) {
- if (!(writemask & (1 << chan)))
- continue;
- LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
-
- /* Skip LDS stores if there is no LDS read of this output. */
- if (info->output_readmask[driver_location] & (1 << chan) ||
- /* The epilog reads LDS if invocation 0 doesn't define tess factors. */
- (is_tess_factor &&
- !ctx->shader->selector->info.tessfactors_are_def_in_all_invocs))
- lshs_lds_store(ctx, chan, dw_addr, value);
-
- value = ac_to_integer(&ctx->ac, value);
- values[chan] = value;
-
- if (writemask != 0xF && !is_tess_factor) {
- LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr,
- LLVMConstInt(ctx->ac.i32, 4 * chan, 0), "");
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, voffset, base, ac_glc);
- }
-
- /* Write tess factors into VGPRs for the epilog. */
- if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
- if (!is_tess_inner) {
- LLVMBuildStore(ctx->ac.builder, value, /* outer */
- ctx->invoc0_tess_factors[chan]);
- } else if (chan < 2) {
- LLVMBuildStore(ctx->ac.builder, value, /* inner */
- ctx->invoc0_tess_factors[4 + chan]);
- }
- }
- }
-
- if (writemask == 0xF && !is_tess_factor) {
- LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
- ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, ac_glc);
- }
-}
-
static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
@@ -769,10 +655,25 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
- if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
+ struct si_shader_info *info = &ctx->shader->selector->info;
+ if (info->tessfactors_are_def_in_all_invocs) {
vgpr++; /* skip the tess factor LDS offset */
+
+ /* get tess factor driver location */
+ int outer_loc = -1;
+ int inner_loc = -1;
+ for (int i = 0; i < info->num_outputs; i++) {
+ unsigned semantic = info->output_semantic[i];
+ if (semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
+ outer_loc = i;
+ else if (semantic == VARYING_SLOT_TESS_LEVEL_INNER)
+ inner_loc = i;
+ }
+
for (unsigned i = 0; i < 6; i++) {
- LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
+ int loc = i < 4 ? outer_loc : inner_loc;
+ LLVMValueRef value = loc < 0 ? LLVMGetUndef(ctx->ac.f32) :
+ LLVMBuildLoad(builder, ctx->abi.outputs[loc * 4 + i % 4], "");
value = ac_to_float(&ctx->ac, value);
ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
}
@@ -920,7 +821,6 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
- ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
}
void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
More information about the mesa-commit
mailing list