[Mesa-dev] [PATCH 1/4] i965/vec4: Add basic common subexpression elimination.
Matt Turner
mattst88 at gmail.com
Mon Jun 30 12:06:56 PDT 2014
On Mon, Jun 30, 2014 at 11:49 AM, Ian Romanick <idr at freedesktop.org> wrote:
> On 06/25/2014 02:12 PM, Matt Turner wrote:
>> From: Kenneth Graunke <kenneth at whitecape.org>
>>
>> [mattst88]: Modified to perform CSE on instructions with
>> the same writemask. Offered no improvement before.
>>
>> total instructions in shared programs: 1995633 -> 1995185 (-0.02%)
>> instructions in affected programs: 14410 -> 13962 (-3.11%)
>>
>> Reviewed-by: Matt Turner <mattst88 at gmail.com>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> ---
>> src/mesa/drivers/dri/i965/Makefile.sources | 1 +
>> src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 +
>> src/mesa/drivers/dri/i965/brw_vec4.h | 2 +
>> src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 237 +++++++++++++++++++++++++++++
>> 4 files changed, 241 insertions(+)
>> create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
>>
>> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
>> index 2570059..8f1d272 100644
>> --- a/src/mesa/drivers/dri/i965/Makefile.sources
>> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
>> @@ -102,6 +102,7 @@ i965_FILES = \
>> brw_util.c \
>> brw_vec4.cpp \
>> brw_vec4_copy_propagation.cpp \
>> + brw_vec4_cse.cpp \
>> brw_vec4_generator.cpp \
>> brw_vec4_gs.c \
>> brw_vec4_gs_visitor.cpp \
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> index 24903f9..0d57399 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
>> @@ -1747,6 +1747,7 @@ vec4_visitor::run()
>> progress = dead_control_flow_eliminate(this) || progress;
>> progress = opt_copy_propagation() || progress;
>> progress = opt_algebraic() || progress;
>> + progress = opt_cse() || progress;
>> progress = opt_register_coalesce() || progress;
>> } while (progress);
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
>> index 366198c..4a8eabb 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
>> @@ -426,6 +426,8 @@ public:
>> bool dead_code_eliminate();
>> bool virtual_grf_interferes(int a, int b);
>> bool opt_copy_propagation();
>> + bool opt_cse_local(bblock_t *, exec_list *);
>> + bool opt_cse();
>> bool opt_algebraic();
>> bool opt_register_coalesce();
>> void opt_set_dependency_control();
>> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
>> new file mode 100644
>> index 0000000..33c7430
>> --- /dev/null
>> +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
>> @@ -0,0 +1,237 @@
>> +/*
>> + * Copyright © 2012, 2013, 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + */
>> +
>> +#include "brw_vec4.h"
>> +#include "brw_cfg.h"
>> +
>> +using namespace brw;
>> +
>> +/** @file brw_vec4_cse.cpp
>> + *
>> + * Support for local common subexpression elimination.
>> + *
>> + * See Muchnick's Advanced Compiler Design and Implementation, section
>> + * 13.1 (p378).
>> + */
>> +
>> +namespace {
>> +struct aeb_entry : public exec_node {
>> + /** The instruction that generates the expression value. */
>> + vec4_instruction *generator;
>> +
>> + /** The temporary where the value is stored. */
>> + src_reg tmp;
>> +};
>> +}
>> +
>> +static bool
>> +is_expression(const vec4_instruction *const inst)
>
> is_expression seems like a weird name for this. It's not obvious to a
> noob what this is doing.
Yeah, sort of is. I'd like to change this one and the fs CSE
implementation in sync.
Not sure what else to call it. can_cse_inst, something else?
>> +{
>> + switch (inst->opcode) {
>> + case BRW_OPCODE_SEL:
>> + case BRW_OPCODE_NOT:
>> + case BRW_OPCODE_AND:
>> + case BRW_OPCODE_OR:
>> + case BRW_OPCODE_XOR:
>> + case BRW_OPCODE_SHR:
>> + case BRW_OPCODE_SHL:
>> + case BRW_OPCODE_ASR:
>> + case BRW_OPCODE_ADD:
>> + case BRW_OPCODE_MUL:
>> + case BRW_OPCODE_FRC:
>> + case BRW_OPCODE_RNDU:
>> + case BRW_OPCODE_RNDD:
>> + case BRW_OPCODE_RNDE:
>> + case BRW_OPCODE_RNDZ:
>> + case BRW_OPCODE_LINE:
>> + case BRW_OPCODE_PLN:
>> + case BRW_OPCODE_MAD:
>> + case BRW_OPCODE_LRP:
>> + return true;
>> + case SHADER_OPCODE_RCP:
>> + case SHADER_OPCODE_RSQ:
>> + case SHADER_OPCODE_SQRT:
>> + case SHADER_OPCODE_EXP2:
>> + case SHADER_OPCODE_LOG2:
>> + case SHADER_OPCODE_POW:
>> + case SHADER_OPCODE_INT_QUOTIENT:
>> + case SHADER_OPCODE_INT_REMAINDER:
>> + case SHADER_OPCODE_SIN:
>> + case SHADER_OPCODE_COS:
>> + return inst->mlen == 0;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> +static bool
>> +is_expression_commutative(enum opcode op)
>> +{
>> + switch (op) {
>> + case BRW_OPCODE_AND:
>> + case BRW_OPCODE_OR:
>> + case BRW_OPCODE_XOR:
>> + case BRW_OPCODE_ADD:
>> + case BRW_OPCODE_MUL:
>> + return true;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> +static bool
>> +operands_match(enum opcode op, src_reg *xs, src_reg *ys)
>> +{
>> + if (!is_expression_commutative(op)) {
>> + return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
>> + } else {
>> + return (xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
>> + (xs[1].equals(ys[0]) && xs[0].equals(ys[1]));
>> + }
>> +}
>> +
>> +static bool
>> +instructions_match(vec4_instruction *a, vec4_instruction *b)
>> +{
>> + return a->opcode == b->opcode &&
>> + a->saturate == b->saturate &&
>> + a->conditional_mod == b->conditional_mod &&
>> + a->dst.type == b->dst.type &&
>> + a->dst.writemask == b->dst.writemask &&
>> + operands_match(a->opcode, a->src, b->src);
>> +}
>> +
>> +bool
>> +vec4_visitor::opt_cse_local(bblock_t *block, exec_list *aeb)
>> +{
>> + bool progress = false;
>> +
>> + void *cse_ctx = ralloc_context(NULL);
>> +
>> + for (vec4_instruction *inst = (vec4_instruction *)block->start;
>> + inst != block->end->next;
>> + inst = (vec4_instruction *) inst->next) {
>
> Are you planning to rebase this on your foreach_inst_in_block patch?
Yes.
>> +
>> + /* Skip some cases. */
>> + if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
>
> Doesn't is_expression already check inst->mlen == 0 for the cases that
> count?
Yeah, looks like it. I'll remove the check here.
>> + !inst->conditional_mod)
>> + {
>
> { should go on previous line.
Okay.
More information about the mesa-dev
mailing list