<p dir="ltr"><br>
On Apr 8, 2015 4:38 PM, "Ian Romanick" <<a href="mailto:idr@freedesktop.org">idr@freedesktop.org</a>> wrote:<br>
><br>
> From: Ian Romanick <<a href="mailto:ian.d.romanick@intel.com">ian.d.romanick@intel.com</a>><br>
><br>
> This is similar to commit (47c4b38: i965/fs: Allow CSE to handle MULs<br>
> with negated arguments.), but it uses a slightly different approach.<br>
><br>
> Shader-db results:<br>
><br>
> GM45:<br>
> total instructions in shared programs: 4060813 -> 4060151 (-0.02%)<br>
> instructions in affected programs:     13448 -> 12786 (-4.92%)<br>
> helped:                                62<br>
> HURT:                                  9<br>
><br>
> All other results, except Broadwell, were identical to GM45 w/o NIR.<br>
> Since NIR isn't used for VEC4, this makes sense.<br>
><br>
> Broadwell:<br>
> total instructions in shared programs: 7284561 -> 7284540 (-0.00%)<br>
> instructions in affected programs:     1272 -> 1251 (-1.65%)<br>
> helped:                                12<br>
><br>
> Broadwell NIR:<br>
> total instructions in shared programs: 7500487 -> 7500487 (0.00%)<br>
> instructions in affected programs:     0 -> 0</p>
<p dir="ltr">The Broadwell numbers aren't interesting. The only reason they are different is that we use scalar for GLSL programs and vec4 for ARB.  With NIR use scalar for everything so it doesn't even touch this patch.</p>
<p dir="ltr">I don't know much about the vec4 backend so take this with a grain of salt, but you can call it</p>
<p dir="ltr">Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>></p>
<p dir="ltr">> Signed-off-by: Ian Romanick <<a href="mailto:ian.d.romanick@intel.com">ian.d.romanick@intel.com</a>><br>
> ---<br>
>  src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 32 +++++++++++++++++++++++++-----<br>
>  1 file changed, 27 insertions(+), 5 deletions(-)<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp<br>
> index 100e511..49b50a7 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp<br>
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp<br>
> @@ -90,15 +90,34 @@ is_expression(const vec4_instruction *const inst)<br>
>  }<br>
><br>
>  static bool<br>
> -operands_match(const vec4_instruction *a, const vec4_instruction *b)<br>
> +operands_match(const vec4_instruction *a, const vec4_instruction *b,<br>
> +               bool *negate)<br>
>  {<br>
>     const src_reg *xs = a->src;<br>
>     const src_reg *ys = b->src;<br>
><br>
> +   *negate = false;<br>
> +<br>
>     if (a->opcode == BRW_OPCODE_MAD) {<br>
>        return xs[0].equals(ys[0]) &&<br>
>               ((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||<br>
>                (xs[2].equals(ys[1]) && xs[1].equals(ys[2])));<br>
> +   } else if (a->opcode == BRW_OPCODE_MUL) {<br>
> +      if ((xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||<br>
> +          (xs[1].equals(ys[0]) && xs[0].equals(ys[1])) ||<br>
> +          (xs[0].negative_equals(ys[0]) && xs[1].negative_equals(ys[1])) ||<br>
> +          (xs[1].negative_equals(ys[0]) && xs[0].negative_equals(ys[1])))<br>
> +         return true;<br>
> +<br>
> +      if ((xs[0].equals(ys[0]) && xs[1].negative_equals(ys[1])) ||<br>
> +          (xs[1].equals(ys[0]) && xs[0].negative_equals(ys[1])) ||<br>
> +          (xs[0].negative_equals(ys[0]) && xs[1].equals(ys[1])) ||<br>
> +          (xs[1].negative_equals(ys[0]) && xs[0].equals(ys[1]))) {<br>
> +         *negate = true;<br>
> +         return true;<br>
> +      }<br>
> +<br>
> +      return false;<br>
>     } else if (!a->is_commutative()) {<br>
>        return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);<br>
>     } else {<br>
> @@ -108,7 +127,7 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b)<br>
>  }<br>
><br>
>  static bool<br>
> -instructions_match(vec4_instruction *a, vec4_instruction *b)<br>
> +instructions_match(vec4_instruction *a, vec4_instruction *b, bool *negate)<br>
>  {<br>
>     return a->opcode == b->opcode &&<br>
>            a->saturate == b->saturate &&<br>
> @@ -117,7 +136,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)<br>
>            a->dst.writemask == b->dst.writemask &&<br>
>            a->force_writemask_all == b->force_writemask_all &&<br>
>            a->regs_written == b->regs_written &&<br>
> -          operands_match(a, b);<br>
> +          operands_match(a, b, negate);<br>
>  }<br>
><br>
>  bool<br>
> @@ -135,11 +154,12 @@ vec4_visitor::opt_cse_local(bblock_t *block)<br>
>            (inst->dst.file != HW_REG || inst->dst.is_null()))<br>
>        {<br>
>           bool found = false;<br>
> +         bool negate;<br>
><br>
>           foreach_in_list_use_after(aeb_entry, entry, &aeb) {<br>
>              /* Match current instruction's expression against those in AEB. */<br>
>              if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&<br>
> -                instructions_match(inst, entry->generator)) {<br>
> +                instructions_match(inst, entry->generator, &negate)) {<br>
>                 found = true;<br>
>                 progress = true;<br>
>                 break;<br>
> @@ -186,6 +206,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)<br>
>                    vec4_instruction *copy = MOV(offset(inst->dst, i),<br>
>                                                 offset(entry->tmp, i));<br>
>                    copy->force_writemask_all = inst->force_writemask_all;<br>
> +                  copy->src[0].negate = negate;<br>
>                    inst->insert_before(block, copy);<br>
>                 }<br>
>              }<br>
> @@ -206,9 +227,10 @@ vec4_visitor::opt_cse_local(bblock_t *block)<br>
>            * the flag register if we just wrote it.<br>
>            */<br>
>           if (inst->writes_flag()) {<br>
> +            bool negate; /* dummy */<br>
>              if (entry->generator->reads_flag() ||<br>
>                  (entry->generator->writes_flag() &&<br>
> -                 !instructions_match(inst, entry->generator))) {<br>
> +                 !instructions_match(inst, entry->generator, &negate))) {<br>
>                 entry->remove();<br>
>                 ralloc_free(entry);<br>
>                 continue;<br>
> --<br>
> 2.1.0<br>
><br>
> _______________________________________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</p>