[Mesa-dev] [PATCH v2 11/11] intel/nir: Enable nir_opt_find_array_copies
Jason Ekstrand
jason at jlekstrand.net
Sun Jul 29 05:44:42 UTC 2018
We have to be a bit careful with this one because we want it to run in
the optimization loop but only in the first brw_nir_optimize call.
Later calls assume that we've lowered away copy_deref instructions and
we don't want to introduce any more.
Shader-db results on Kaby Lake:
total instructions in shared programs: 15176942 -> 15176942 (0.00%)
instructions in affected programs: 0 -> 0
helped: 0
HURT: 0
In spite of the lack of any shader-db improvement, this patch completely
eliminates spilling in the Batman: Arkham City tessellation shaders.
This is because we are now able to detect that the temporary array
created by DXVK for storing TCS inputs is a copy of the input arrays and
use indirect URB reads instead of making a copy of 4.5 KiB of input data
and then indirecting on it with if-ladders.
---
src/intel/compiler/brw_nir.c | 16 +++++++++-------
src/intel/compiler/brw_nir.h | 3 ++-
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 5e9da9e1ef2..2417c0cd618 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -533,7 +533,7 @@ brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
nir_shader *
brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
- bool is_scalar)
+ bool is_scalar, bool allow_copies)
{
nir_variable_mode indirect_mask =
brw_nir_no_indirect_mask(compiler, nir->info.stage);
@@ -544,6 +544,8 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_split_array_vars, nir_var_local);
OPT(nir_shrink_vec_array_vars, nir_var_local);
OPT(nir_lower_vars_to_ssa);
+ if (allow_copies)
+ OPT(nir_opt_find_array_copies);
OPT(nir_opt_copy_prop_vars);
if (is_scalar) {
@@ -664,7 +666,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
nir_lower_isign64 |
nir_lower_divmod64);
- nir = brw_nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar, true);
/* This needs to be run after the first optimization pass but before we
* lower indirect derefs away
@@ -701,7 +703,7 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
nir_lower_indirect_derefs(nir, indirect_mask);
/* Get rid of split copies */
- nir = brw_nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar, false);
OPT(nir_remove_dead_variables, nir_var_local);
@@ -730,11 +732,11 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
const bool p_is_scalar =
compiler->scalar_stage[(*producer)->info.stage];
- *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
+ *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false);
const bool c_is_scalar =
compiler->scalar_stage[(*producer)->info.stage];
- *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
+ *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false);
}
}
@@ -761,7 +763,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
OPT(nir_opt_algebraic_before_ffma);
} while (progress);
- nir = brw_nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar, false);
if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
@@ -857,7 +859,7 @@ brw_nir_apply_sampler_key(nir_shader *nir,
if (nir_lower_tex(nir, &tex_options)) {
nir_validate_shader(nir);
- nir = brw_nir_optimize(nir, compiler, is_scalar);
+ nir = brw_nir_optimize(nir, compiler, is_scalar, false);
}
return nir;
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 7d82edafe46..0193457c577 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -155,7 +155,8 @@ bool brw_nir_opt_peephole_ffma(nir_shader *shader);
nir_shader *brw_nir_optimize(nir_shader *nir,
const struct brw_compiler *compiler,
- bool is_scalar);
+ bool is_scalar,
+ bool allow_copies);
#define BRW_NIR_FRAG_OUTPUT_INDEX_SHIFT 0
#define BRW_NIR_FRAG_OUTPUT_INDEX_MASK INTEL_MASK(0, 0)
--
2.17.1
More information about the mesa-dev
mailing list