[Mesa-dev] [PATCH 20/20] nir: find induction/limit vars in iand instructions
Timothy Arceri
tarceri at itsqueeze.com
Fri Dec 7 03:08:20 UTC 2018
This will be used to help find the trip count of loops that look
like the following:
while (a < x && i < 8) {
...
i++;
}
Where the NIR will end up looking something like this:
vec1 32 ssa_0 = load_const (0x00000000 /* 0.000000 */)
vec1 32 ssa_1 = load_const (0x00000008 /* 0.000000 */)
loop {
...
vec1 32 ssa_28 = ige ssa_26, ssa_3
vec1 32 ssa_29 = ige ssa_27, ssa_1
vec1 32 ssa_30 = iadd ssa_29, ssa_28
vec1 ssa_31 = ieq ssa_30, ssa_0
if ssa_31 {
...
break
} else {
...
}
...
}
On RADV this unrolls a bunch of loops in F1-2017 shaders.
Totals from affected shaders:
SGPRS: 4112 -> 4032 (-1.95 %)
VGPRS: 4076 -> 3996 (-1.96 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 510184 -> 589868 (15.62 %) bytes
LDS: 2 -> 2 (0.00 %) blocks
Max Waves: 200 -> 202 (1.00 %)
Wait states: 0 -> 0 (0.00 %)
It also unrolls a couple of loops in shader-db on radeonsi.
Totals from affected shaders:
SGPRS: 128 -> 128 (0.00 %)
VGPRS: 64 -> 64 (0.00 %)
Spilled SGPRs: 0 -> 0 (0.00 %)
Spilled VGPRs: 0 -> 0 (0.00 %)
Private memory VGPRs: 0 -> 0 (0.00 %)
Scratch size: 0 -> 0 (0.00 %) dwords per thread
Code Size: 6880 -> 9504 (38.14 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Max Waves: 16 -> 16 (0.00 %)
Wait states: 0 -> 0 (0.00 %)
---
src/compiler/nir/nir_loop_analyze.c | 71 ++++++++++++++++++++++++++++-
1 file changed, 70 insertions(+), 1 deletion(-)
diff --git a/src/compiler/nir/nir_loop_analyze.c b/src/compiler/nir/nir_loop_analyze.c
index ea20db9dbf..27f4ee427c 100644
--- a/src/compiler/nir/nir_loop_analyze.c
+++ b/src/compiler/nir/nir_loop_analyze.c
@@ -736,6 +736,59 @@ get_induction_and_limit_vars(nir_alu_instr *alu, nir_loop_variable **ind,
return limit_rhs;
}
+static void
+try_find_trip_count_vars_in_iand(nir_alu_instr **alu,
+ nir_loop_variable **ind,
+ nir_loop_variable **limit,
+ bool *limit_rhs,
+ loop_info_state *state)
+{
+ assert((*alu)->op == nir_op_ieq);
+
+ nir_ssa_def *iand_def = (*alu)->src[0].src.ssa;
+ nir_ssa_def *zero_def = (*alu)->src[1].src.ssa;
+
+ if (iand_def->parent_instr->type != nir_instr_type_alu ||
+ zero_def->parent_instr->type != nir_instr_type_load_const) {
+
+ /* Maybe we had it the wrong way, flip things around */
+ iand_def = (*alu)->src[1].src.ssa;
+ zero_def = (*alu)->src[0].src.ssa;
+
+ /* If we still didn't find what we need then return */
+ if (iand_def->parent_instr->type != nir_instr_type_alu ||
+ zero_def->parent_instr->type != nir_instr_type_load_const)
+ return;
+ }
+
+ /* If the loop is not breaking on (x && y) == 0 then return */
+ nir_alu_instr *iand = nir_instr_as_alu(iand_def->parent_instr);
+ nir_const_value zero =
+ nir_instr_as_load_const(zero_def->parent_instr)->value;
+ if (iand->op != nir_op_iand || zero.i32[0] != 0)
+ return;
+
+ /* Check if iand src is a terminator condition and try get induction var
+ * and trip limit var.
+ */
+ nir_ssa_def *src = iand->src[0].src.ssa;
+ if (src->parent_instr->type == nir_instr_type_alu) {
+ *alu = nir_instr_as_alu(src->parent_instr);
+ if (is_supported_terminator_condition(*alu))
+ *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+ }
+
+ /* Try the other iand src if needed */
+ if ((*ind)->type != basic_induction) {
+ src = iand->src[1].src.ssa;
+ if (src->parent_instr->type == nir_instr_type_alu) {
+ *alu = nir_instr_as_alu(src->parent_instr);
+ if (is_supported_terminator_condition(*alu))
+ *limit_rhs = get_induction_and_limit_vars(*alu, ind, limit, state);
+ }
+ }
+}
+
/* Run through each of the terminators of the loop and try to infer a possible
* trip-count. We need to check them all, and set the lowest trip-count as the
* trip-count of our loop. If one of the terminators has an undecidable
@@ -774,7 +827,21 @@ find_trip_count(loop_info_state *state)
nir_loop_variable *limit;
bool limit_rhs = get_induction_and_limit_vars(alu, &basic_ind, &limit,
state);
- terminator->induction_rhs = !limit_rhs;
+
+ if (basic_ind->type != basic_induction && alu->op == nir_op_ieq) {
+ trip_count_known = false;
+ terminator->exact_trip_count_unknown = true;
+
+ try_find_trip_count_vars_in_iand(&alu, &basic_ind, &limit,
+ &limit_rhs, state);
+
+ /* The loop is exiting on (x && y) == 0 so we need to get the
+ * inverse of x or y (i.e. which ever contained the induction var) in
+ * order to compute the trip count.
+ */
+ if (basic_ind->type == basic_induction)
+ alu_op = inverse_comparision(alu);
+ }
/* The comparison has to have a basic induction variable for us to be
* able to find trip counts.
@@ -784,6 +851,8 @@ find_trip_count(loop_info_state *state)
continue;
}
+ terminator->induction_rhs = !limit_rhs;
+
/* Attempt to find a constant limit for the loop */
nir_const_value limit_val;
if (is_var_constant(limit)) {
--
2.19.2
More information about the mesa-dev
mailing list