[Mesa-dev] [PATCH 1/4] i965/vec4: Add basic common subexpression elimination.

Matt Turner mattst88 at gmail.com
Mon Jun 30 12:06:56 PDT 2014


On Mon, Jun 30, 2014 at 11:49 AM, Ian Romanick <idr at freedesktop.org> wrote:
> On 06/25/2014 02:12 PM, Matt Turner wrote:
>> From: Kenneth Graunke <kenneth at whitecape.org>
>>
>> [mattst88]: Modified to perform CSE on instructions with
>>             the same writemask. Offered no improvement before.
>>
>> total instructions in shared programs: 1995633 -> 1995185 (-0.02%)
>> instructions in affected programs:     14410 -> 13962 (-3.11%)
>>
>> Reviewed-by: Matt Turner <mattst88 at gmail.com>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> ---
>>  src/mesa/drivers/dri/i965/Makefile.sources |   1 +
>>  src/mesa/drivers/dri/i965/brw_vec4.cpp     |   1 +
>>  src/mesa/drivers/dri/i965/brw_vec4.h       |   2 +
>>  src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 237 +++++++++++++++++++++++++++++
>>  4 files changed, 241 insertions(+)
>>  create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
>>
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 2570059..8f1d272 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -102,6 +102,7 @@ i965_FILES = \
>>       brw_util.c \
>>       brw_vec4.cpp \
>>       brw_vec4_copy_propagation.cpp \
>> +     brw_vec4_cse.cpp \
>>       brw_vec4_generator.cpp \
>>       brw_vec4_gs.c \
>>       brw_vec4_gs_visitor.cpp \
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index 24903f9..0d57399 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -1747,6 +1747,7 @@ vec4_visitor::run()
>>        progress = dead_control_flow_eliminate(this) || progress;
>>        progress = opt_copy_propagation() || progress;
>>        progress = opt_algebraic() || progress;
>> +      progress = opt_cse() || progress;
>>        progress = opt_register_coalesce() || progress;
>>     } while (progress);
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
>> index 366198c..4a8eabb 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
>> @@ -426,6 +426,8 @@ public:
>>     bool dead_code_eliminate();
>>     bool virtual_grf_interferes(int a, int b);
>>     bool opt_copy_propagation();
>> +   bool opt_cse_local(bblock_t *, exec_list *);
>> +   bool opt_cse();
>>     bool opt_algebraic();
>>     bool opt_register_coalesce();
>>     void opt_set_dependency_control();
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
>> new file mode 100644
>> index 0000000..33c7430
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
>> @@ -0,0 +1,237 @@
>> +/*
>> + * Copyright © 2012, 2013, 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "brw_vec4.h"
>> +#include "brw_cfg.h"
>> +
>> +using namespace brw;
>> +
>> +/** @file brw_vec4_cse.cpp
>> + *
>> + * Support for local common subexpression elimination.
>> + *
>> + * See Muchnick's Advanced Compiler Design and Implementation, section
>> + * 13.1 (p378).
>> + */
>> +
>> +namespace {
>> +struct aeb_entry : public exec_node {
>> +   /** The instruction that generates the expression value. */
>> +   vec4_instruction *generator;
>> +
>> +   /** The temporary where the value is stored. */
>> +   src_reg tmp;
>> +};
>> +}
>> +
>> +static bool
>> +is_expression(const vec4_instruction *const inst)
>
> is_expression seems like a weird name for this.  It's not obvious to a
> noob what this is doing.

Yeah, sort of is. I'd like to change this one and the fs CSE
implementation in sync.

Not sure what else to call it. can_cse_inst, something else?

>> +{
>> +   switch (inst->opcode) {
>> +   case BRW_OPCODE_SEL:
>> +   case BRW_OPCODE_NOT:
>> +   case BRW_OPCODE_AND:
>> +   case BRW_OPCODE_OR:
>> +   case BRW_OPCODE_XOR:
>> +   case BRW_OPCODE_SHR:
>> +   case BRW_OPCODE_SHL:
>> +   case BRW_OPCODE_ASR:
>> +   case BRW_OPCODE_ADD:
>> +   case BRW_OPCODE_MUL:
>> +   case BRW_OPCODE_FRC:
>> +   case BRW_OPCODE_RNDU:
>> +   case BRW_OPCODE_RNDD:
>> +   case BRW_OPCODE_RNDE:
>> +   case BRW_OPCODE_RNDZ:
>> +   case BRW_OPCODE_LINE:
>> +   case BRW_OPCODE_PLN:
>> +   case BRW_OPCODE_MAD:
>> +   case BRW_OPCODE_LRP:
>> +      return true;
>> +   case SHADER_OPCODE_RCP:
>> +   case SHADER_OPCODE_RSQ:
>> +   case SHADER_OPCODE_SQRT:
>> +   case SHADER_OPCODE_EXP2:
>> +   case SHADER_OPCODE_LOG2:
>> +   case SHADER_OPCODE_POW:
>> +   case SHADER_OPCODE_INT_QUOTIENT:
>> +   case SHADER_OPCODE_INT_REMAINDER:
>> +   case SHADER_OPCODE_SIN:
>> +   case SHADER_OPCODE_COS:
>> +      return inst->mlen == 0;
>> +   default:
>> +      return false;
>> +   }
>> +}
>> +
>> +static bool
>> +is_expression_commutative(enum opcode op)
>> +{
>> +   switch (op) {
>> +   case BRW_OPCODE_AND:
>> +   case BRW_OPCODE_OR:
>> +   case BRW_OPCODE_XOR:
>> +   case BRW_OPCODE_ADD:
>> +   case BRW_OPCODE_MUL:
>> +      return true;
>> +   default:
>> +      return false;
>> +   }
>> +}
>> +
>> +static bool
>> +operands_match(enum opcode op, src_reg *xs, src_reg *ys)
>> +{
>> +   if (!is_expression_commutative(op)) {
>> +      return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
>> +   } else {
>> +      return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
>> +             (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
>> +   }
>> +}
>> +
>> +static bool
>> +instructions_match(vec4_instruction *a, vec4_instruction *b)
>> +{
>> +   return a->opcode == b->opcode &&
>> +          a->saturate == b->saturate &&
>> +          a->conditional_mod == b->conditional_mod &&
>> +          a->dst.type == b->dst.type &&
>> +          a->dst.writemask == b->dst.writemask &&
>> +          operands_match(a->opcode, a->src, b->src);
>> +}
>> +
>> +bool
>> +vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
>> +{
>> +   bool progress = false;
>> +
>> +   void *cse_ctx = ralloc_context(NULL);
>> +
>> +   for (vec4_instruction *inst = (vec4_instruction *)block->start;
>> +        inst != block->end->next;
>> +        inst = (vec4_instruction *) inst->next) {
>
> Are you planning to rebase this on your foreach_inst_in_block patch?

Yes.

>> +
>> +      /* Skip some cases. */
>> +      if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
>
> Doesn't is_expression already check inst->mlen == 0 for the cases that
> count?

Yeah, looks like it. I'll remove the check here.

>> +          !inst->conditional_mod)
>> +      {
>
> { should go on previous line.

Okay.


More information about the mesa-dev mailing list