Mesa (main): radeonsi: replace llvm ls/hs interface lds ops with nir lowered ones
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Jun 7 02:12:43 UTC 2022
Module: Mesa
Branch: main
Commit: 61c500ee9b88b0b5097b470044432ec0b5428660
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=61c500ee9b88b0b5097b470044432ec0b5428660
Author: Qiang Yu <yuq825 at gmail.com>
Date: Mon May 9 21:28:26 2022 +0800
radeonsi: replace llvm ls/hs interface lds ops with nir lowered ones
Use ac nir lower pass to generate these lds load/store ops explicitly.
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Signed-off-by: Qiang Yu <yuq825 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16418>
---
src/amd/llvm/ac_nir_to_llvm.c | 9 +-
src/amd/llvm/ac_shader_abi.h | 3 +-
src/gallium/drivers/radeonsi/si_shader.c | 40 +++++++-
src/gallium/drivers/radeonsi/si_shader_internal.h | 1 -
src/gallium/drivers/radeonsi/si_shader_llvm.c | 7 +-
src/gallium/drivers/radeonsi/si_shader_llvm_tess.c | 105 ++++++---------------
6 files changed, 69 insertions(+), 96 deletions(-)
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index de572aa896d..074774e982f 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3476,17 +3476,10 @@ static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
- bool vertex_index_is_invoc_id =
- vertex_index_src &&
- vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&
- nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==
- nir_intrinsic_load_invocation_id;
-
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
vertex_index, indir_index,
base, component,
- count, !is_output,
- vertex_index_is_invoc_id);
+ count, !is_output);
if (instr->dest.ssa.bit_size == 16) {
result = ac_to_integer(&ctx->ac, result);
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h
index 2d2697b2e71..4584d52d120 100644
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -67,8 +67,7 @@ struct ac_shader_abi {
LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
- unsigned num_components,
- bool load_inputs, bool vertex_index_is_invoc_id);
+ unsigned num_components, bool load_inputs);
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
LLVMValueRef vertex_index, LLVMValueRef param_index,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0ec4f8c42f8..3e91215a627 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1490,6 +1490,30 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key)
return progress;
}
+static unsigned si_map_io_driver_location(unsigned semantic)
+{
+ return si_shader_io_get_unique_index(semantic, false);
+}
+
+static bool si_lower_io_to_mem(const union si_shader_key *key,
+ nir_shader *nir,
+ uint64_t tcs_vgpr_only_inputs)
+{
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ if (key->ge.as_ls) {
+ NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location,
+ key->ge.opt.same_patch_vertices, tcs_vgpr_only_inputs);
+ return true;
+ }
+ } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+ NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
+ key->ge.opt.same_patch_vertices);
+ return true;
+ }
+
+ return false;
+}
+
struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
const union si_shader_key *key,
bool *free_nir,
@@ -1603,10 +1627,22 @@ struct nir_shader *si_get_nir_shader(struct si_shader_selector *sel,
* this should be done after that.
*/
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
- if (progress2)
+
+ bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs);
+
+ if (progress2 || opt_offsets)
si_nir_opts(sel->screen, nir, false);
- if (progress || progress2)
+ if (opt_offsets) {
+ static const nir_opt_offsets_options offset_options = {
+ .uniform_max = 0,
+ .buffer_max = ~0,
+ .shared_max = ~0,
+ };
+ NIR_PASS_V(nir, nir_opt_offsets, &offset_options);
+ }
+
+ if (progress || progress2 || opt_offsets)
si_nir_late_opts(nir);
NIR_PASS_V(nir, nir_divergence_analysis);
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index 907dc04b796..ee04b3708d7 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -39,7 +39,6 @@ struct si_shader_output_values {
struct si_shader_context {
struct ac_llvm_context ac;
struct si_shader *shader;
- struct si_shader_selector *next_shader_sel;
struct si_screen *screen;
struct pipe_stream_output_info so;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c
index 2a15e71086e..a5b1389f24c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -743,10 +743,10 @@ static LLVMValueRef si_llvm_load_intrinsic(struct ac_shader_abi *abi, nir_intrin
}
case nir_intrinsic_load_tess_level_outer:
- return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true, false);
+ return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs, 0, 4, true);
case nir_intrinsic_load_tess_level_inner:
- return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true, false);
+ return abi->load_tess_varyings(abi, ctx->ac.f32, NULL, NULL, info->num_inputs + 1, 0, 4, true);
case nir_intrinsic_load_tess_level_outer_default:
case nir_intrinsic_load_tess_level_inner_default: {
@@ -1241,9 +1241,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_build_tcs_epilog(&ctx, &tcs_epilog_key);
parts[3] = ctx.main_fn;
- /* VS as LS main part */
- ctx.next_shader_sel = ctx.shader->selector;
-
struct si_shader shader_ls = {};
shader_ls.selector = ls;
shader_ls.key.ge.part.vs.prolog = shader->key.ge.part.tcs.ls_prolog;
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
index 72c355378bb..24e1858e3a2 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
@@ -384,44 +384,33 @@ void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
- unsigned num_components, bool load_input,
- bool vertex_index_is_invoc_id)
+ unsigned num_components, bool load_input)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
- LLVMValueRef dw_addr, stride;
- ubyte semantic;
+ LLVMValueRef value[4];
if (load_input) {
- semantic = info->input[driver_location].semantic;
- } else {
- semantic = info->output_semantic[driver_location];
- }
+ assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
- /* Load the TCS input from a VGPR if possible. */
- if (ctx->shader->key.ge.opt.same_patch_vertices &&
- load_input && vertex_index_is_invoc_id && !param_index) {
+ ubyte semantic = info->input[driver_location].semantic;
+ /* Load the TCS input from a VGPR. */
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
- si_shader_io_get_unique_index(semantic, false) * 4;
- LLVMValueRef value[4];
+ si_shader_io_get_unique_index(semantic, false) * 4;
for (unsigned i = component; i < component + num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
}
+ } else {
+ ubyte semantic = info->output_semantic[driver_location];
- return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
- }
-
- bool is_patch = vertex_index == NULL;
- assert((semantic >= VARYING_SLOT_PATCH0 ||
- semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
- semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
+ bool is_patch = vertex_index == NULL;
+ assert((semantic >= VARYING_SLOT_PATCH0 ||
+ semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
+ semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
- if (load_input) {
- stride = si_get_tcs_in_vertex_dw_stride(ctx);
- dw_addr = get_tcs_in_current_patch_offset(ctx);
- } else {
+ LLVMValueRef dw_addr, stride;
if (is_patch) {
stride = NULL;
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
@@ -429,14 +418,13 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
}
- }
- dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
- semantic);
+ dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index,
+ param_index, semantic);
- LLVMValueRef value[4];
- for (unsigned i = component; i < component + num_components; i++)
- value[i] = lshs_lds_load(ctx, type, i, dw_addr);
+ for (unsigned i = component; i < component + num_components; i++)
+ value[i] = lshs_lds_load(ctx, type, i, dw_addr);
+ }
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
@@ -444,8 +432,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
LLVMValueRef vertex_index, LLVMValueRef param_index,
unsigned driver_location, unsigned component,
- unsigned num_components,
- bool load_input, bool vertex_index_is_invoc_id)
+ unsigned num_components, bool load_input)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
@@ -877,58 +864,20 @@ void si_llvm_ls_build_end(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct si_shader_info *info = &shader->selector->info;
- unsigned i, chan;
- LLVMValueRef vertex_id;
- if (ctx->screen->info.gfx_level >= GFX11) {
- vertex_id = ac_build_imad(&ctx->ac, si_unpack_param(ctx, ctx->args.tcs_wave_id, 0, 5),
- LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0),
- ac_get_thread_id(&ctx->ac));
- } else {
- vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);
- }
- LLVMValueRef vertex_dw_stride = si_get_tcs_in_vertex_dw_stride(ctx);
- LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
LLVMValueRef *addrs = ctx->abi.outputs;
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
- /* Write outputs to LDS. The next shader (TCS aka HS) will read
- * its inputs from it. */
- for (i = 0; i < info->num_outputs; i++) {
- unsigned semantic = info->output_semantic[i];
-
- /* The ARB_shader_viewport_layer_array spec contains the
- * following issue:
- *
- * 2) What happens if gl_ViewportIndex or gl_Layer is
- * written in the vertex shader and a geometry shader is
- * present?
- *
- * RESOLVED: The value written by the last vertex processing
- * stage is used. If the last vertex processing stage
- * (vertex, tessellation evaluation or geometry) does not
- * statically assign to gl_ViewportIndex or gl_Layer, index
- * or layer zero is assumed.
- *
- * So writes to those outputs in VS-as-LS are simply ignored.
- */
- if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT)
- continue;
-
- int param = si_shader_io_get_unique_index(semantic, false);
- LLVMValueRef dw_addr =
- LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
-
- for (chan = 0; chan < 4; chan++) {
- if (!(info->output_usagemask[i] & (1 << chan)))
- continue;
+ if (shader->key.ge.opt.same_patch_vertices) {
+ for (unsigned i = 0; i < info->num_outputs; i++) {
+ unsigned semantic = info->output_semantic[i];
+ int param = si_shader_io_get_unique_index(semantic, false);
- LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
+ for (unsigned chan = 0; chan < 4; chan++) {
+ if (!(info->output_usagemask[i] & (1 << chan)))
+ continue;
- if (!shader->key.ge.opt.same_patch_vertices ||
- !(ctx->next_shader_sel->info.tcs_vgpr_only_inputs & (1ull << semantic)))
- lshs_lds_store(ctx, chan, dw_addr, value);
+ LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
- if (shader->key.ge.opt.same_patch_vertices) {
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
value, ret_offset + param * 4 + chan, "");
}
More information about the mesa-commit
mailing list