<div dir="ltr">This patch appears to cause regressions on Haswell.  I'm not sure why.  The fact that it's only haswell strikes me as a bit odd so it's possible it's not your patch's fault.  I'm looking into it.<br></div><div class="gmail_extra"><br><div class="gmail_quote">On Tue, Jan 10, 2017 at 1:41 AM, Timothy Arceri <span dir="ltr"><<a href="mailto:timothy.arceri@collabora.com" target="_blank">timothy.arceri@collabora.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">If we just check that we are not dealing with an identity swizzle<br>
in match_value() before calling match_expression() we can avoid<br>
a bunch of temp swizzle arrays and the passing it around and<br>
resetting craziness.<br>
---<br>
 src/compiler/nir/nir_search.c | 89 ++++++++++++++++++------------<wbr>-------------<br>
 1 file changed, 38 insertions(+), 51 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir_search.<wbr>c b/src/compiler/nir/nir_search.<wbr>c<br>
index b34b13f..7a84b18 100644<br>
--- a/src/compiler/nir/nir_search.<wbr>c<br>
+++ b/src/compiler/nir/nir_search.<wbr>c<br>
@@ -37,8 +37,7 @@ struct match_state {<br>
<br>
 static bool<br>
 match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
-                 unsigned num_components, const uint8_t *swizzle,<br>
-                 struct match_state *state);<br>
+                 unsigned num_components, struct match_state *state);<br>
<br>
 static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 };<br>
<br>
@@ -93,22 +92,15 @@ src_is_type(nir_src src, nir_alu_type type)<br>
<br>
 static bool<br>
 match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
-            unsigned num_components, const uint8_t *swizzle,<br>
-            struct match_state *state)<br>
+            unsigned num_components, struct match_state *state)<br>
 {<br>
-   uint8_t new_swizzle[4];<br>
-<br>
    /* If the source is an explicitly sized source, then we need to reset<br>
-    * both the number of components and the swizzle.<br>
+    * the number of components.<br>
     */<br>
    if (nir_op_infos[instr->op].<wbr>input_sizes[src] != 0) {<br>
       num_components = nir_op_infos[instr->op].input_<wbr>sizes[src];<br>
-      swizzle = identity_swizzle;<br>
    }<br>
<br>
-   for (unsigned i = 0; i < num_components; ++i)<br>
-      new_swizzle[i] = instr->src[src].swizzle[<wbr>swizzle[i]];<br>
-<br>
    /* If the value has a specific bit size and it doesn't match, bail */<br>
    if (value->bit_size &&<br>
        nir_src_bit_size(instr->src[<wbr>src].src) != value->bit_size)<br>
@@ -122,9 +114,23 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
       if (instr->src[src].src.ssa-><wbr>parent_instr->type != nir_instr_type_alu)<br>
          return false;<br>
<br>
+      /* If we have an explicitly sized destination, we can only handle the<br>
+       * identity swizzle.  While dot(vec3(a, b, c).zxy) is a valid<br>
+       * expression, we don't have the information right now to propagate that<br>
+       * swizzle through.  We can only properly propagate swizzles if the<br>
+       * instruction is vectorized.<br>
+       */<br>
+      nir_alu_instr *alu_instr =<br>
+         nir_instr_as_alu(instr->src[<wbr>src].src.ssa->parent_instr);<br>
+      if (nir_op_infos[alu_instr->op].<wbr>output_size != 0) {<br>
+         for (unsigned i = 0; i < num_components; i++) {<br>
+            if (instr->src[src].swizzle[i] != i)<br>
+               return false;<br>
+         }<br>
+      }<br>
+<br>
       return match_expression(nir_search_<wbr>value_as_expression(value),<br>
-                              nir_instr_as_alu(instr->src[<wbr>src].src.ssa->parent_instr),<br>
-                              num_components, new_swizzle, state);<br>
+                              alu_instr, num_components, state);<br>
<br>
    case nir_search_value_variable: {<br>
       nir_search_variable *var = nir_search_value_as_variable(<wbr>value);<br>
@@ -138,7 +144,8 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
          assert(!instr->src[src].abs && !instr->src[src].negate);<br>
<br>
          for (unsigned i = 0; i < num_components; ++i) {<br>
-            if (state->variables[var-><wbr>variable].swizzle[i] != new_swizzle[i])<br>
+            if (state->variables[var-><wbr>variable].swizzle[i] !=<br>
+                instr->src[src].swizzle[i])<br>
                return false;<br>
          }<br>
<br>
@@ -148,7 +155,8 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
              instr->src[src].src.ssa-><wbr>parent_instr->type != nir_instr_type_load_const)<br>
             return false;<br>
<br>
-         if (var->cond && !var->cond(instr, src, num_components, new_swizzle))<br>
+         if (var->cond && !var->cond(instr, src, num_components,<br>
+                                     instr->src[src].swizzle))<br>
             return false;<br>
<br>
          if (var->type != nir_type_invalid &&<br>
@@ -161,9 +169,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
          state->variables[var-><wbr>variable].negate = false;<br>
<br>
          for (unsigned i = 0; i < 4; ++i) {<br>
-            if (i < num_components)<br>
-               state->variables[var-><wbr>variable].swizzle[i] = new_swizzle[i];<br>
-            else<br>
+            if (i < num_components) {<br>
+               state->variables[var-><wbr>variable].swizzle[i] =<br>
+                  instr->src[src].swizzle[i];<br>
+            } else<br>
                state->variables[var-><wbr>variable].swizzle[i] = 0;<br>
          }<br>
<br>
@@ -189,10 +198,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
             double val;<br>
             switch (load->def.bit_size) {<br>
             case 32:<br>
-               val = load->value.f32[new_swizzle[i]<wbr>];<br>
+               val = load->value.f32[instr->src[<wbr>src].swizzle[i]];<br>
                break;<br>
             case 64:<br>
-               val = load->value.f64[new_swizzle[i]<wbr>];<br>
+               val = load->value.f64[instr->src[<wbr>src].swizzle[i]];<br>
                break;<br>
             default:<br>
                unreachable("unknown bit size");<br>
@@ -208,10 +217,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
             int64_t val;<br>
             switch (load->def.bit_size) {<br>
             case 32:<br>
-               val = load->value.i32[new_swizzle[i]<wbr>];<br>
+               val = load->value.i32[instr->src[<wbr>src].swizzle[i]];<br>
                break;<br>
             case 64:<br>
-               val = load->value.i64[new_swizzle[i]<wbr>];<br>
+               val = load->value.i64[instr->src[<wbr>src].swizzle[i]];<br>
                break;<br>
             default:<br>
                unreachable("unknown bit size");<br>
@@ -228,10 +237,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
             uint64_t val;<br>
             switch (load->def.bit_size) {<br>
             case 32:<br>
-               val = load->value.u32[new_swizzle[i]<wbr>];<br>
+               val = load->value.u32[instr->src[<wbr>src].swizzle[i]];<br>
                break;<br>
             case 64:<br>
-               val = load->value.u64[new_swizzle[i]<wbr>];<br>
+               val = load->value.u64[instr->src[<wbr>src].swizzle[i]];<br>
                break;<br>
             default:<br>
                unreachable("unknown bit size");<br>
@@ -254,8 +263,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,<br>
<br>
 static bool<br>
 match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
-                 unsigned num_components, const uint8_t *swizzle,<br>
-                 struct match_state *state)<br>
+                 unsigned num_components, struct match_state *state)<br>
 {<br>
    if (instr->op != expr->opcode)<br>
       return false;<br>
@@ -274,19 +282,6 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
    assert(!instr->dest.saturate);<br>
    assert(nir_op_infos[instr->op]<wbr>.num_inputs > 0);<br>
<br>
-   /* If we have an explicitly sized destination, we can only handle the<br>
-    * identity swizzle.  While dot(vec3(a, b, c).zxy) is a valid<br>
-    * expression, we don't have the information right now to propagate that<br>
-    * swizzle through.  We can only properly propagate swizzles if the<br>
-    * instruction is vectorized.<br>
-    */<br>
-   if (nir_op_infos[instr->op].<wbr>output_size != 0) {<br>
-      for (unsigned i = 0; i < num_components; i++) {<br>
-         if (swizzle[i] != i)<br>
-            return false;<br>
-      }<br>
-   }<br>
-<br>
    /* Stash off the current variables_seen bitmask.  This way we can<br>
     * restore it prior to matching in the commutative case below.<br>
     */<br>
@@ -294,8 +289,7 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
<br>
    bool matched = true;<br>
    for (unsigned i = 0; i < nir_op_infos[instr->op].num_<wbr>inputs; i++) {<br>
-      if (!match_value(expr->srcs[i], instr, i, num_components,<br>
-                       swizzle, state)) {<br>
+      if (!match_value(expr->srcs[i], instr, i, num_components, state)) {<br>
          matched = false;<br>
          break;<br>
       }<br>
@@ -313,12 +307,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,<br>
        */<br>
       state->variables_seen = variables_seen_stash;<br>
<br>
-      if (!match_value(expr->srcs[0], instr, 1, num_components,<br>
-                       swizzle, state))<br>
+      if (!match_value(expr->srcs[0], instr, 1, num_components, state))<br>
          return false;<br>
<br>
-      return match_value(expr->srcs[1], instr, 0, num_components,<br>
-                         swizzle, state);<br>
+      return match_value(expr->srcs[1], instr, 0, num_components, state);<br>
    } else {<br>
       return false;<br>
    }<br>
@@ -578,11 +570,6 @@ nir_alu_instr *<br>
 nir_replace_instr(nir_alu_<wbr>instr *instr, const nir_search_expression *search,<br>
                   const nir_search_value *replace, void *mem_ctx)<br>
 {<br>
-   uint8_t swizzle[4] = { 0, 0, 0, 0 };<br>
-<br>
-   for (unsigned i = 0; i < instr->dest.dest.ssa.num_<wbr>components; ++i)<br>
-      swizzle[i] = i;<br>
-<br>
    assert(instr->dest.dest.is_<wbr>ssa);<br>
<br>
    struct match_state state;<br>
@@ -591,7 +578,7 @@ nir_replace_instr(nir_alu_<wbr>instr *instr, const nir_search_expression *search,<br>
    state.variables_seen = 0;<br>
<br>
    if (!match_expression(search, instr, instr->dest.dest.ssa.num_<wbr>components,<br>
-                         swizzle, &state))<br>
+                         &state))<br>
       return NULL;<br>
<br>
    void *bitsize_ctx = ralloc_context(NULL);<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.9.3<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>