Mesa (main): nir/gcm: pin some instructions which require uniform sources
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Aug 24 17:24:13 UTC 2021
Module: Mesa
Branch: main
Commit: 3d228b692679bc713fa037f8cf796d1cf361c920
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d228b692679bc713fa037f8cf796d1cf361c920
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Wed Aug 18 10:52:20 2021 +0100
nir/gcm: pin some instructions which require uniform sources
fossil-db (Sienna Cichlid, GCM enabled):
Totals from 6192 (4.12% of 150170) affected shaders:
VGPRs: 548392 -> 542040 (-1.16%)
SpillSGPRs: 3702 -> 3990 (+7.78%); split: -0.54%, +8.32%
CodeSize: 62418488 -> 62481516 (+0.10%); split: -0.07%, +0.17%
MaxWaves: 70582 -> 71718 (+1.61%)
Instrs: 11768497 -> 11795079 (+0.23%); split: -0.07%, +0.30%
Latency: 445891848 -> 523561297 (+17.42%); split: -0.07%, +17.49%
InvThroughput: 115675481 -> 121494913 (+5.03%); split: -0.09%, +5.12%
VClause: 164914 -> 164934 (+0.01%); split: -0.05%, +0.06%
SClause: 405991 -> 395302 (-2.63%); split: -2.64%, +0.00%
Copies: 907216 -> 926429 (+2.12%); split: -1.11%, +3.23%
Branches: 456373 -> 457478 (+0.24%); split: -0.13%, +0.38%
PreSGPRs: 648030 -> 642953 (-0.78%); split: -0.88%, +0.10%
PreVGPRs: 522425 -> 516355 (-1.16%); split: -1.16%, +0.00%
Seems to affect Detroit: Become Human and Cyberpunk 2077. The Cyberpunk
2077 changes look like a fixed bug. At least some of the Detroit: Become
Human changes could probably be removed with better divergence analysis.
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timothy Arceri <tarceri at itsqueeze.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12444>
---
src/compiler/nir/nir_opt_gcm.c | 109 +++++++++++++++++++++++++++++++++++++----
1 file changed, 100 insertions(+), 9 deletions(-)
diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c
index 853b630b224..48f1aed2a7e 100644
--- a/src/compiler/nir/nir_opt_gcm.c
+++ b/src/compiler/nir/nir_opt_gcm.c
@@ -222,6 +222,74 @@ is_src_scalarizable(nir_src *src)
}
}
+static bool
+is_binding_dynamically_uniform(nir_src src)
+{
+ nir_binding binding = nir_chase_binding(src);
+ if (!binding.success)
+ return false;
+
+ for (unsigned i = 0; i < binding.num_indices; i++) {
+ if (!nir_src_is_dynamically_uniform(binding.indices[i]))
+ return false;
+ }
+
+ return true;
+}
+
+static void
+pin_intrinsic(nir_intrinsic_instr *intrin)
+{
+ nir_instr *instr = &intrin->instr;
+
+ if (!nir_intrinsic_can_reorder(intrin)) {
+ instr->pass_flags = GCM_INSTR_PINNED;
+ return;
+ }
+
+ instr->pass_flags = 0;
+
+ /* If the intrinsic requires a uniform source, we can't safely move it across non-uniform
+ * control flow if it's not uniform at the point it's defined.
+ * Stores and atomics can never be re-ordered, so we don't have to consider them here.
+ */
+ bool non_uniform = nir_intrinsic_has_access(intrin) &&
+ (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM);
+ if (!non_uniform &&
+ (intrin->intrinsic == nir_intrinsic_load_ubo ||
+ intrin->intrinsic == nir_intrinsic_load_ssbo ||
+ intrin->intrinsic == nir_intrinsic_get_ubo_size ||
+ intrin->intrinsic == nir_intrinsic_get_ssbo_size ||
+ nir_intrinsic_has_image_dim(intrin) ||
+ ((intrin->intrinsic == nir_intrinsic_load_deref ||
+ intrin->intrinsic == nir_intrinsic_deref_buffer_array_length) &&
+ nir_deref_mode_may_be(nir_src_as_deref(intrin->src[0]),
+ nir_var_mem_ubo | nir_var_mem_ssbo)))) {
+ if (!is_binding_dynamically_uniform(intrin->src[0]))
+ instr->pass_flags = GCM_INSTR_PINNED;
+ } else if (intrin->intrinsic == nir_intrinsic_load_push_constant) {
+ if (!nir_src_is_dynamically_uniform(intrin->src[0]))
+ instr->pass_flags = GCM_INSTR_PINNED;
+ } else if (intrin->intrinsic == nir_intrinsic_load_deref &&
+ nir_deref_mode_is(nir_src_as_deref(intrin->src[0]),
+ nir_var_mem_push_const)) {
+ nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
+ while (deref->deref_type != nir_deref_type_var) {
+ if ((deref->deref_type == nir_deref_type_array ||
+ deref->deref_type == nir_deref_type_ptr_as_array) &&
+ !nir_src_is_dynamically_uniform(deref->arr.index)) {
+ instr->pass_flags = GCM_INSTR_PINNED;
+ return;
+ }
+ deref = nir_deref_instr_parent(deref);
+ if (!deref) {
+ instr->pass_flags = GCM_INSTR_PINNED;
+ return;
+ }
+ }
+ }
+}
+
/* Walks the instruction list and marks immovable instructions as pinned or
* placed.
*
@@ -265,24 +333,47 @@ gcm_pin_instructions(nir_function_impl *impl, struct gcm_state *state)
}
break;
- case nir_instr_type_tex:
- if (nir_tex_instr_has_implicit_derivative(nir_instr_as_tex(instr)))
+ case nir_instr_type_tex: {
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ if (nir_tex_instr_has_implicit_derivative(tex))
instr->pass_flags = GCM_INSTR_SCHEDULE_EARLIER_ONLY;
+
+ for (unsigned i = 0; i < tex->num_srcs; i++) {
+ nir_tex_src *src = &tex->src[i];
+ switch (src->src_type) {
+ case nir_tex_src_texture_deref:
+ if (!tex->texture_non_uniform && !is_binding_dynamically_uniform(src->src))
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+ case nir_tex_src_sampler_deref:
+ if (!tex->sampler_non_uniform && !is_binding_dynamically_uniform(src->src))
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+ case nir_tex_src_texture_offset:
+ case nir_tex_src_texture_handle:
+ if (!tex->texture_non_uniform && !nir_src_is_dynamically_uniform(src->src))
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+ case nir_tex_src_sampler_offset:
+ case nir_tex_src_sampler_handle:
+ if (!tex->sampler_non_uniform && !nir_src_is_dynamically_uniform(src->src))
+ instr->pass_flags = GCM_INSTR_PINNED;
+ break;
+ default:
+ break;
+ }
+ }
break;
+ }
case nir_instr_type_deref:
case nir_instr_type_load_const:
instr->pass_flags = 0;
break;
- case nir_instr_type_intrinsic: {
- if (nir_intrinsic_can_reorder(nir_instr_as_intrinsic(instr))) {
- instr->pass_flags = 0;
- } else {
- instr->pass_flags = GCM_INSTR_PINNED;
- }
+ case nir_instr_type_intrinsic:
+ pin_intrinsic(nir_instr_as_intrinsic(instr));
break;
- }
case nir_instr_type_jump:
case nir_instr_type_ssa_undef:
More information about the mesa-commit
mailing list