Mesa (main): ac/nir: Analyze culling shaders to remember which inputs are used when.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jul 14 00:44:53 UTC 2021
Module: Mesa
Branch: main
Commit: fc1fabbabf3f8ac2f38ad51d8547182b523a33fb
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc1fabbabf3f8ac2f38ad51d8547182b523a33fb
Author: Timur Kristóf <timur.kristof at gmail.com>
Date: Mon Jul 5 15:17:10 2021 +0200
ac/nir: Analyze culling shaders to remember which inputs are used when.
These will be useful for some optimizations.
Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10525>
---
src/amd/common/ac_nir.h | 2 +
src/amd/common/ac_nir_lower_ngg.c | 97 +++++++++++++++++++++++++++++++++++++++
2 files changed, 99 insertions(+)
diff --git a/src/amd/common/ac_nir.h b/src/amd/common/ac_nir.h
index 6a32e64e159..4f4076cdd29 100644
--- a/src/amd/common/ac_nir.h
+++ b/src/amd/common/ac_nir.h
@@ -96,6 +96,8 @@ typedef struct
unsigned lds_bytes_if_culling_off;
bool can_cull;
bool passthrough;
+ uint64_t nggc_inputs_read_by_pos;
+ uint64_t nggc_inputs_read_by_others;
} ac_nir_ngg_config;
ac_nir_ngg_config
diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c
index d21e9cb099c..405298a00d5 100644
--- a/src/amd/common/ac_nir_lower_ngg.c
+++ b/src/amd/common/ac_nir_lower_ngg.c
@@ -26,6 +26,12 @@
#include "nir_builder.h"
#include "u_math.h"
+enum {
+ nggc_passflag_used_by_pos = 1,
+ nggc_passflag_used_by_other = 2,
+ nggc_passflag_used_by_both = nggc_passflag_used_by_pos | nggc_passflag_used_by_other,
+};
+
typedef struct
{
nir_variable *position_value_var;
@@ -42,6 +48,9 @@ typedef struct
unsigned provoking_vtx_idx;
unsigned max_es_num_vertices;
unsigned total_lds_bytes;
+
+ uint64_t inputs_needed_by_pos;
+ uint64_t inputs_needed_by_others;
} lower_ngg_nogs_state;
typedef struct
@@ -628,6 +637,86 @@ compact_vertices_after_culling(nir_builder *b,
nir_pop_if(b, if_gs_accepted);
}
+static void
+analyze_shader_before_culling_walk(nir_ssa_def *ssa,
+ uint8_t flag,
+ lower_ngg_nogs_state *nogs_state)
+{
+ nir_instr *instr = ssa->parent_instr;
+ uint8_t old_pass_flags = instr->pass_flags;
+ instr->pass_flags |= flag;
+
+ if (instr->pass_flags == old_pass_flags)
+ return; /* Already visited. */
+
+ switch (instr->type) {
+ case nir_instr_type_intrinsic: {
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+
+ /* VS input loads and SSBO loads are actually VRAM reads on AMD HW. */
+ switch (intrin->intrinsic) {
+ case nir_intrinsic_load_input: {
+ nir_io_semantics in_io_sem = nir_intrinsic_io_semantics(intrin);
+ uint64_t in_mask = UINT64_C(1) << (uint64_t) in_io_sem.location;
+ if (instr->pass_flags & nggc_passflag_used_by_pos)
+ nogs_state->inputs_needed_by_pos |= in_mask;
+ else if (instr->pass_flags & nggc_passflag_used_by_other)
+ nogs_state->inputs_needed_by_others |= in_mask;
+ break;
+ }
+ default:
+ break;
+ }
+
+ break;
+ }
+ case nir_instr_type_alu: {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ unsigned num_srcs = nir_op_infos[alu->op].num_inputs;
+
+ for (unsigned i = 0; i < num_srcs; ++i) {
+ analyze_shader_before_culling_walk(alu->src[i].src.ssa, flag, nogs_state);
+ }
+
+ break;
+ }
+ case nir_instr_type_phi: {
+ nir_phi_instr *phi = nir_instr_as_phi(instr);
+ nir_foreach_phi_src_safe(phi_src, phi) {
+ analyze_shader_before_culling_walk(phi_src->src.ssa, flag, nogs_state);
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static void
+analyze_shader_before_culling(nir_shader *shader, lower_ngg_nogs_state *nogs_state)
+{
+ nir_foreach_function(func, shader) {
+ nir_foreach_block(block, func->impl) {
+ nir_foreach_instr(instr, block) {
+ instr->pass_flags = 0;
+
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
+ nir_ssa_def *store_val = intrin->src[0].ssa;
+ uint8_t flag = io_sem.location == VARYING_SLOT_POS ? nggc_passflag_used_by_pos : nggc_passflag_used_by_other;
+ analyze_shader_before_culling_walk(store_val, flag, nogs_state);
+ }
+ }
+ }
+}
+
static void
add_deferred_attribute_culling(nir_builder *b, nir_cf_list *original_extracted_cf, lower_ngg_nogs_state *nogs_state)
{
@@ -932,6 +1021,12 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
nir_builder *b = &builder; /* This is to avoid the & */
nir_builder_init(b, impl);
+ if (can_cull) {
+ /* We need divergence info for culling shaders. */
+ nir_divergence_analysis(shader);
+ analyze_shader_before_culling(shader, &state);
+ }
+
nir_cf_list extracted;
nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body));
b->cursor = nir_before_cf_list(&impl->body);
@@ -1032,6 +1127,8 @@ ac_nir_lower_ngg_nogs(nir_shader *shader,
.lds_bytes_if_culling_off = lds_bytes_if_culling_off,
.can_cull = can_cull,
.passthrough = passthrough,
+ .nggc_inputs_read_by_pos = state.inputs_needed_by_pos,
+ .nggc_inputs_read_by_others = state.inputs_needed_by_others,
};
return ret;
More information about the mesa-commit
mailing list