<p dir="ltr">Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>></p>
<div class="gmail_quote">On May 28, 2015 12:06 AM, "Iago Toral Quiroga" <<a href="mailto:itoral@igalia.com">itoral@igalia.com</a>> wrote:<br type="attribution"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">When we compute the output swizzle we want to consider the number of<br>
components in the add operation. So far we were using the writemask<br>
of the multiplication for this instead, which is not correct.<br>
---<br>
 src/glsl/nir/nir_opt_peephole_ffma.c | 19 +++++++++----------<br>
 1 file changed, 9 insertions(+), 10 deletions(-)<br>
<br>
diff --git a/src/glsl/nir/nir_opt_peephole_ffma.c b/src/glsl/nir/nir_opt_peephole_ffma.c<br>
index b430eac..798506b 100644<br>
--- a/src/glsl/nir/nir_opt_peephole_ffma.c<br>
+++ b/src/glsl/nir/nir_opt_peephole_ffma.c<br>
@@ -73,7 +73,8 @@ are_all_uses_fadd(nir_ssa_def *def)<br>
 }<br>
<br>
 static nir_alu_instr *<br>
-get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)<br>
+get_mul_for_src(nir_alu_src *src, int num_components,<br>
+                uint8_t swizzle[4], bool *negate, bool *abs)<br>
 {<br>
    assert(src->src.is_ssa && !src->abs && !src->negate);<br>
<br>
@@ -85,16 +86,16 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)<br>
    switch (alu->op) {<br>
    case nir_op_imov:<br>
    case nir_op_fmov:<br>
-      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);<br>
+      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);<br>
       break;<br>
<br>
    case nir_op_fneg:<br>
-      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);<br>
+      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);<br>
       *negate = !*negate;<br>
       break;<br>
<br>
    case nir_op_fabs:<br>
-      alu = get_mul_for_src(&alu->src[0], swizzle, negate, abs);<br>
+      alu = get_mul_for_src(&alu->src[0], num_components, swizzle, negate, abs);<br>
       *negate = false;<br>
       *abs = true;<br>
       break;<br>
@@ -115,12 +116,8 @@ get_mul_for_src(nir_alu_src *src, uint8_t swizzle[4], bool *negate, bool *abs)<br>
    if (!alu)<br>
       return NULL;<br>
<br>
-   for (unsigned i = 0; i < 4; i++) {<br>
-      if (!(alu->dest.write_mask & (1 << i)))<br>
-         break;<br>
-<br>
+   for (unsigned i = 0; i < num_components; i++)<br>
       swizzle[i] = swizzle[src->swizzle[i]];<br>
-   }<br>
<br>
    return alu;<br>
 }<br>
@@ -160,7 +157,9 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)<br>
          negate = false;<br>
          abs = false;<br>
<br>
-         mul = get_mul_for_src(&add->src[add_mul_src], swizzle, &negate, &abs);<br>
+         mul = get_mul_for_src(&add->src[add_mul_src],<br>
+                               add->dest.dest.ssa.num_components,<br>
+                               swizzle, &negate, &abs);<br>
<br>
          if (mul != NULL)<br>
             break;<br>
--<br>
1.9.1<br>
<br>
</blockquote></div>