Mesa (master): nir/lower_vec_to_movs: don't vectorize unsupports ops
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jan 11 13:31:36 UTC 2021
Module: Mesa
Branch: master
Commit: faaba0d6afe0c5f6985345c7c6226435658d196a
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=faaba0d6afe0c5f6985345c7c6226435658d196a
Author: Erico Nunes <nunes.erico at gmail.com>
Date: Sun Aug 30 15:07:23 2020 +0200
nir/lower_vec_to_movs: don't vectorize unsupports ops
If the instruction being coalesced would be vectorized but the target
doesn't support vectorizing that op, skip coalescing.
Reuse the callbacks from alu_to_scalar to describe which ops should not
be vectorized.
Signed-off-by: Erico Nunes <nunes.erico at gmail.com>
Reviewed-by: Jason Ekstrand <jason at jlekstrand.net>
Reviewed-by: Eric Anholt <eric at anholt.net>
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig at collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6506>
---
src/compiler/nir/nir.h | 11 ++++++++-
src/compiler/nir/nir_lower_vec_to_movs.c | 37 +++++++++++++++++++++++-----
src/gallium/auxiliary/nir/nir_to_tgsi.c | 2 +-
src/gallium/drivers/freedreno/a2xx/ir2_nir.c | 2 +-
src/gallium/drivers/lima/lima_program.c | 13 +++++++++-
src/intel/compiler/brw_nir.c | 2 +-
src/panfrost/midgard/midgard_compile.c | 2 +-
7 files changed, 57 insertions(+), 12 deletions(-)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 81450cf62a2..f4963ef7060 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -4088,6 +4088,14 @@ static inline bool should_print_nir(nir_shader *shader) { return false; }
*/
typedef bool (*nir_instr_filter_cb)(const nir_instr *, const void *);
+/** An instruction filtering callback with writemask
+ *
+ * Returns true if the instruction should be processed with the associated
+ * writemask and false otherwise.
+ */
+typedef bool (*nir_instr_writemask_filter_cb)(const nir_instr *,
+ unsigned writemask, const void *);
+
/** A simple instruction lowering callback
*
* Many instruction lowering passes can be written as a simple function which
@@ -4457,7 +4465,8 @@ bool nir_lower_variable_initializers(nir_shader *shader,
nir_variable_mode modes);
bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
-bool nir_lower_vec_to_movs(nir_shader *shader);
+bool nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
+ const void *_data);
void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
bool alpha_to_one,
const gl_state_index16 *alpha_ref_state_tokens);
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
index 29ce7e508a2..3efe709b39f 100644
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -28,6 +28,11 @@
#include "nir.h"
#include "nir_builder.h"
+struct vec_to_movs_data {
+ nir_instr_writemask_filter_cb cb;
+ const void *data;
+};
+
/*
* Implements a simple pass that lowers vecN instructions to a series of
* moves with partial writes.
@@ -119,8 +124,10 @@ has_replicated_dest(nir_alu_instr *alu)
* can then call insert_mov as normal.
*/
static unsigned
-try_coalesce(nir_alu_instr *vec, unsigned start_idx)
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, void *_data)
{
+ struct vec_to_movs_data *data = _data;
+
assert(start_idx < nir_op_infos[vec->op].num_inputs);
/* We will only even try if the source is SSA */
@@ -178,6 +185,7 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
for (unsigned i = 0; i < 4; i++)
swizzles[j][i] = src_alu->src[j].swizzle[i];
+ /* Generate the final write mask */
unsigned write_mask = 0;
for (unsigned i = start_idx; i < 4; i++) {
if (!(vec->dest.write_mask & (1 << i)))
@@ -187,10 +195,21 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
continue;
- /* At this point, the give vec source matchese up with the ALU
+ write_mask |= 1 << i;
+ }
+
+ /* If the instruction would be vectorized but the backend
+ * doesn't support vectorizing this op, abort. */
+ if (data->cb && !data->cb(&src_alu->instr, write_mask, data->data))
+ return 0;
+
+ for (unsigned i = start_idx; i < 4; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
+ /* At this point, the given vec source matches up with the ALU
* instruction so we can re-swizzle that component to match.
*/
- write_mask |= 1 << i;
if (has_replicated_dest(src_alu)) {
/* Since the destination is a single replicated value, we don't need
* to do any reswizzling
@@ -266,7 +285,7 @@ nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data)
* vecN had an SSA destination.
*/
if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
- finished_write_mask |= try_coalesce(vec, i);
+ finished_write_mask |= try_coalesce(vec, i, data);
if (!(finished_write_mask & (1 << i)))
finished_write_mask |= insert_mov(vec, i, b->shader);
@@ -279,11 +298,17 @@ nir_lower_vec_to_movs_instr(nir_builder *b, nir_instr *instr, void *data)
}
bool
-nir_lower_vec_to_movs(nir_shader *shader)
+nir_lower_vec_to_movs(nir_shader *shader, nir_instr_writemask_filter_cb cb,
+ const void *_data)
{
+ struct vec_to_movs_data data = {
+ .cb = cb,
+ .data = _data,
+ };
+
return nir_shader_instructions_pass(shader,
nir_lower_vec_to_movs_instr,
nir_metadata_block_index |
nir_metadata_dominance,
- NULL);
+ &data);
}
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 88ba7d4bec2..587c06074be 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2622,7 +2622,7 @@ nir_to_tgsi(struct nir_shader *s,
nir_lower_float_source_mods |
nir_lower_int_source_mods); /* no doubles */
NIR_PASS_V(s, nir_convert_from_ssa, true);
- NIR_PASS_V(s, nir_lower_vec_to_movs);
+ NIR_PASS_V(s, nir_lower_vec_to_movs, NULL, NULL);
/* locals_to_regs will leave dead derefs that are good to clean up. */
NIR_PASS_V(s, nir_lower_locals_to_regs);
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 4f25ad90688..be80e4bc504 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -1111,7 +1111,7 @@ ir2_nir_compile(struct ir2_context *ctx, bool binning)
OPT_V(ctx->nir, nir_convert_from_ssa, true);
OPT_V(ctx->nir, nir_move_vec_src_uses_to_dest);
- OPT_V(ctx->nir, nir_lower_vec_to_movs);
+ OPT_V(ctx->nir, nir_lower_vec_to_movs, NULL, NULL);
OPT_V(ctx->nir, nir_opt_dce);
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 30a3f527181..5d58750eff1 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -191,6 +191,17 @@ lima_alu_to_scalar_filter_cb(const nir_instr *instr, const void *data)
return false;
}
+static bool
+lima_vec_to_movs_filter_cb(const nir_instr *instr, unsigned writemask,
+ const void *data)
+{
+ assert(writemask > 0);
+ if (util_bitcount(writemask) == 1)
+ return true;
+
+ return !lima_alu_to_scalar_filter_cb(instr, data);
+}
+
void
lima_program_optimize_fs_nir(struct nir_shader *s,
struct nir_lower_tex_options *tex_options)
@@ -252,7 +263,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
NIR_PASS_V(s, nir_remove_dead_variables, nir_var_function_temp, NULL);
NIR_PASS_V(s, nir_move_vec_src_uses_to_dest);
- NIR_PASS_V(s, nir_lower_vec_to_movs);
+ NIR_PASS_V(s, nir_lower_vec_to_movs, lima_vec_to_movs_filter_cb, NULL);
NIR_PASS_V(s, lima_nir_duplicate_load_uniforms);
NIR_PASS_V(s, lima_nir_duplicate_load_inputs);
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 0b319d6afac..16ae9ccc04f 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -1183,7 +1183,7 @@ brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
if (!is_scalar) {
OPT(nir_move_vec_src_uses_to_dest);
- OPT(nir_lower_vec_to_movs);
+ OPT(nir_lower_vec_to_movs, NULL, NULL);
}
OPT(nir_opt_dce);
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 631aae293b0..253cdedc53f 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -348,7 +348,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
/* We are a vector architecture; write combine where possible */
NIR_PASS(progress, nir, nir_move_vec_src_uses_to_dest);
- NIR_PASS(progress, nir, nir_lower_vec_to_movs);
+ NIR_PASS(progress, nir, nir_lower_vec_to_movs, NULL, NULL);
NIR_PASS(progress, nir, nir_opt_dce);
}
More information about the mesa-commit
mailing list