[Mesa-dev] [PATCH 1/2] nir/lower_vec_to_movs: Generate the write mask early in try_coalesce
Jason Ekstrand
jason at jlekstrand.net
Wed Apr 4 05:23:36 UTC 2018
It will soon be useful to have this as a separate step.
---
src/compiler/nir/nir_lower_vec_to_movs.c | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
index 8b24376..610a362 100644
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -171,12 +171,6 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
return 0;
}
- /* Stash off all of the ALU instruction's swizzles. */
- uint8_t swizzles[4][4];
- for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
- for (unsigned i = 0; i < 4; i++)
- swizzles[j][i] = src_alu->src[j].swizzle[i];
-
unsigned write_mask = 0;
for (unsigned i = start_idx; i < 4; i++) {
if (!(vec->dest.write_mask & (1 << i)))
@@ -186,10 +180,22 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
vec->src[i].src.ssa != &src_alu->dest.dest.ssa)
continue;
+ write_mask |= 1 << i;
+ }
+
+ /* Stash off all of the ALU instruction's swizzles. */
+ uint8_t swizzles[4][4];
+ for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
+ for (unsigned i = 0; i < 4; i++)
+ swizzles[j][i] = src_alu->src[j].swizzle[i];
+
+ for (unsigned i = start_idx; i < 4; i++) {
+ if (!(write_mask & (1 << i)))
+ continue;
+
/* At this point, the give vec source matchese up with the ALU
* instruction so we can re-swizzle that component to match.
*/
- write_mask |= 1 << i;
if (has_replicated_dest(src_alu)) {
/* Since the destination is a single replicated value, we don't need
* to do any reswizzling
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list