[Mesa-dev] [RFC] nir/algebraic: support for power-of-two optimizations

Sat May 7 17:06:18 UTC 2016

From: Rob Clark <robclark at freedesktop.org>

It was kinda sad that we couldn't optimize imul/idiv by power-of-two.
So I bashed my head against python for a while and this is what I came
up with.  In the search expression, you can use "#a^2" to only match
constants which are a power of two.  The rest is taken care of w/ normal
replacement expression.  (Might be nice if we had an ilog2 to avoid the
float/int conversion stuff.)

Still a couple rough edges and things which should be split out.
---
 src/compiler/nir/nir_algebraic.py           |  9 ++++--
 src/compiler/nir/nir_opt_algebraic.py       |  5 ++++
 src/compiler/nir/nir_search.c               | 27 +++++++++++++++++
 src/compiler/nir/nir_search.h               |  9 +++++-
 src/gallium/drivers/freedreno/ir3/ir3_nir.c | 45 +++++++++++++++++++----------
 5 files changed, 77 insertions(+), 18 deletions(-)

diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index 285f853..c2b47fd 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -83,6 +83,7 @@ static const ${val.c_type} ${val.name} = {
 % elif isinstance(val, Variable):
    ${val.index}, /* ${val.var_name} */
    ${'true' if val.is_constant else 'false'},
+   ${'true' if val.is_power_of_two else 'false'},
    ${val.type() or 'nir_type_invalid' },
 % elif isinstance(val, Expression):
    ${'true' if val.inexact else 'false'},
@@ -113,7 +114,7 @@ static const ${val.c_type} ${val.name} = {
                                     Variable=Variable,
                                     Expression=Expression)
 
-_constant_re = re.compile(r"(?P<value>[^@]+)(?:@(?P<bits>\d+))?")
+_constant_re = re.compile(r"(?P<value>[^@\^]+)(?P<PoT>\^2)?(?:@(?P<bits>\d+))?")
 
 class Constant(Value):
    def __init__(self, val, name):
@@ -123,6 +124,7 @@ class Constant(Value):
          m = _constant_re.match(val)
          self.value = ast.literal_eval(m.group('value'))
          self.bit_size = int(m.group('bits')) if m.group('bits') else 0
+         self.power_of_two = True if m.group('PoT') else False
       else:
          self.value = val
          self.bit_size = 0
@@ -149,7 +151,7 @@ class Constant(Value):
       elif isinstance(self.value, float):
          return "nir_type_float"
 
-_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)"
+_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?P<PoT>\^2)?"
                           r"(?:@(?P<type>int|uint|bool|float)?(?P<bits>\d+)?)?")
 
 class Variable(Value):
@@ -161,6 +163,9 @@ class Variable(Value):
 
       self.var_name = m.group('name')
       self.is_constant = m.group('const') is not None
+      self.is_power_of_two = m.group('PoT') is not None
+      if self.is_power_of_two:
+         assert self.is_constant
       self.required_type = m.group('type')
       self.bit_size = int(m.group('bits')) if m.group('bits') else 0
 
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 0a95725..e1381b2 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -62,6 +62,11 @@ d = 'd'
 # constructed value should have that bit-size.
 
 optimizations = [
+
+   # add 64b variants?
+   (('imul', a, '#b^2 at 32'), ('ishl', a, ('f2i', ('flog2', ('i2f', b))))),
+   (('idiv', a, '#b^2 at 32'), ('ishr', a, ('f2i', ('flog2', ('i2f', b))))),
+
    (('fneg', ('fneg', a)), a),
    (('ineg', ('ineg', a)), a),
    (('fabs', ('fabs', a)), ('fabs', a)),
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 2c2fd92..92af521 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -70,6 +70,13 @@ alu_instr_is_bool(nir_alu_instr *instr)
    }
 }
 
+/* helper for this somewhere? */
+static bool
+is_power_of_two(unsigned int x)
+{
+   return ((x != 0) && !(x & (x - 1)));
+}
+
 static bool
 match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
             unsigned num_components, const uint8_t *swizzle,
@@ -127,6 +134,26 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
              instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const)
             return false;
 
+         if (var->is_power_of_two) {
+            assert(var->is_constant);
+            nir_const_value *val = nir_src_as_const_value(instr->src[src].src);
+            for (unsigned i = 0; i < num_components; i++) {
+               switch (nir_op_infos[instr->op].input_types[src]) {
+               // TODO handle other types??
+               case nir_type_int:
+                  if (!is_power_of_two(val->i32[new_swizzle[i]]))
+                     return false;
+                  break;
+               case nir_type_uint:
+                  if (!is_power_of_two(val->u32[new_swizzle[i]]))
+                     return false;
+                  break;
+               default:
+                  return false;
+               }
+            }
+         }
+
          if (var->type != nir_type_invalid) {
             if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
                return false;
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h
index c49eba7..32ed538 100644
--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@@ -52,11 +52,18 @@ typedef struct {
 
    /** Indicates that the given variable must be a constant
     *
-    * This is only alloed in search expressions and indicates that the
+    * This is only allowed in search expressions and indicates that the
     * given variable is only allowed to match constant values.
     */
    bool is_constant;
 
+   /** Indicates that the given constant is a power of two
+    *
+    * This is only allowed in search expressions, and only for constant
+    * variables.
+    */
+   bool is_power_of_two;
+
    /** Indicates that the given variable must have a certain type
     *
     * This is only allowed in search expressions and indicates that the
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
index 7e3ccc0..44c694a 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c
@@ -77,6 +77,27 @@ ir3_key_lowers_nir(const struct ir3_shader_key *key)
 
 #define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
 
+static void
+ir3_optimize_loop(nir_shader *s)
+{
+	bool progress;
+	do {
+		progress = false;
+
+		OPT_V(s, nir_lower_vars_to_ssa);
+		OPT_V(s, nir_lower_alu_to_scalar);
+		OPT_V(s, nir_lower_phis_to_scalar);
+
+		progress |= OPT(s, nir_copy_prop);
+		progress |= OPT(s, nir_opt_dce);
+		progress |= OPT(s, nir_opt_cse);
+		progress |= OPT(s, ir3_nir_lower_if_else);
+		progress |= OPT(s, nir_opt_algebraic);
+		progress |= OPT(s, nir_opt_constant_folding);
+
+	} while (progress);
+}
+
 struct nir_shader *
 ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 		const struct ir3_shader_key *key)
@@ -84,7 +105,6 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 	struct nir_lower_tex_options tex_options = {
 			.lower_rect = 0,
 	};
-	bool progress;
 
 	if (key) {
 		switch (shader->type) {
@@ -140,24 +160,19 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s,
 	}
 
 	OPT_V(s, nir_lower_tex, &tex_options);
-	OPT_V(s, nir_lower_idiv);
 	OPT_V(s, nir_lower_load_const_to_scalar);
 
-	do {
-		progress = false;
-
-		OPT_V(s, nir_lower_vars_to_ssa);
-		OPT_V(s, nir_lower_alu_to_scalar);
-		OPT_V(s, nir_lower_phis_to_scalar);
+	ir3_optimize_loop(s);
 
-		progress |= OPT(s, nir_copy_prop);
-		progress |= OPT(s, nir_opt_dce);
-		progress |= OPT(s, nir_opt_cse);
-		progress |= OPT(s, ir3_nir_lower_if_else);
-		progress |= OPT(s, nir_opt_algebraic);
-		progress |= OPT(s, nir_opt_constant_folding);
+	/* do idiv lowering after first opt loop to give a chance for
+	 * divide by immed power-of-two to be caught first:
+	 *
+	 * XXX TODO nir_lower_idiv should return progress so we could
+	 * skip second loop..
+	 */
+	OPT_V(s, nir_lower_idiv);
 
-	} while (progress);
+	ir3_optimize_loop(s);
 
 	OPT_V(s, nir_remove_dead_variables, nir_var_local);
 
-- 
2.5.5