[Mesa-dev] [PATCH 2/2] i965/vec4: Rewrite dead code elimination to use live in/out.
Kenneth Graunke
kenneth at whitecape.org
Sat Nov 8 22:18:12 PST 2014
On Monday, November 03, 2014 01:34:49 PM Matt Turner wrote:
> Improves 359 shaders by >=10%
> 114 shaders by >=20%
> 91 shaders by >=30%
> 82 shaders by >=40%
> 22 shaders by >=50%
> 4 shaders by >=60%
> 2 shaders by >=80%
>
> total instructions in shared programs: 5505182 -> 5482260 (-0.42%)
> instructions in affected programs: 364629 -> 341707 (-6.29%)
> ---
> src/mesa/drivers/dri/i965/Makefile.sources | 1 +
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 155 -------------------
> .../dri/i965/brw_vec4_dead_code_eliminate.cpp | 169 +++++++++++++++++++++
> 3 files changed, 170 insertions(+), 155 deletions(-)
> create mode 100644 src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
>
> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
> index 711aabe..10be4f1 100644
> --- a/src/mesa/drivers/dri/i965/Makefile.sources
> +++ b/src/mesa/drivers/dri/i965/Makefile.sources
> @@ -102,6 +102,7 @@ i965_FILES = \
> brw_vec4.cpp \
> brw_vec4_copy_propagation.cpp \
> brw_vec4_cse.cpp \
> + brw_vec4_dead_code_eliminate.cpp \
> brw_vec4_generator.cpp \
> brw_vec4_gs_visitor.cpp \
> brw_vec4_live_variables.cpp \
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> index df589b8..6560351 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
> @@ -411,161 +411,6 @@ vec4_visitor::opt_reduce_swizzle()
> return progress;
> }
>
> -static bool
> -try_eliminate_instruction(vec4_instruction *inst, int new_writemask,
> - const struct brw_context *brw)
> -{
> - if (inst->has_side_effects())
> - return false;
> -
> - if (new_writemask == 0) {
> - /* Don't dead code eliminate instructions that write to the
> - * accumulator as a side-effect. Instead just set the destination
> - * to the null register to free it.
> - */
> - if (inst->writes_accumulator || inst->writes_flag()) {
> - inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
> - } else {
> - inst->opcode = BRW_OPCODE_NOP;
> - }
> -
> - return true;
> - } else if (inst->dst.writemask != new_writemask) {
> - switch (inst->opcode) {
> - case SHADER_OPCODE_TXF_CMS:
> - case SHADER_OPCODE_GEN4_SCRATCH_READ:
> - case VS_OPCODE_PULL_CONSTANT_LOAD:
> - case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> - break;
> - default:
> - /* Do not set a writemask on Gen6 for math instructions, those are
> - * executed using align1 mode that does not support a destination mask.
> - */
> - if (!(brw->gen == 6 && inst->is_math()) && !inst->is_tex()) {
> - inst->dst.writemask = new_writemask;
> - return true;
> - }
> - }
> - }
> -
> - return false;
> -}
> -
> -/**
> - * Must be called after calculate_live_intervals() to remove unused
> - * writes to registers -- register allocation will fail otherwise
> - * because something deffed but not used won't be considered to
> - * interfere with other regs.
> - */
> -bool
> -vec4_visitor::dead_code_eliminate()
> -{
> - bool progress = false;
> - int pc = -1;
> -
> - calculate_live_intervals();
> -
> - foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
> - pc++;
> -
> - bool inst_writes_flag = false;
> - if (inst->dst.file != GRF) {
> - if (inst->dst.is_null() && inst->writes_flag()) {
> - inst_writes_flag = true;
> - } else {
> - continue;
> - }
> - }
> -
> - if (inst->dst.file == GRF) {
> - int write_mask = inst->dst.writemask;
> -
> - for (int c = 0; c < 4; c++) {
> - if (write_mask & (1 << c)) {
> - assert(this->virtual_grf_end[inst->dst.reg * 4 + c] >= pc);
> - if (this->virtual_grf_end[inst->dst.reg * 4 + c] == pc) {
> - write_mask &= ~(1 << c);
> - }
> - }
> - }
> -
> - progress = try_eliminate_instruction(inst, write_mask, brw) ||
> - progress;
> - }
> -
> - if (inst->predicate || inst->prev == NULL)
> - continue;
> -
> - int dead_channels;
> - if (inst_writes_flag) {
> -/* Arbitrarily chosen, other than not being an xyzw writemask. */
> -#define FLAG_WRITEMASK (1 << 5)
> - dead_channels = inst->reads_flag() ? 0 : FLAG_WRITEMASK;
> - } else {
> - dead_channels = inst->dst.writemask;
> -
> - for (int i = 0; i < 3; i++) {
> - if (inst->src[i].file != GRF ||
> - inst->src[i].reg != inst->dst.reg)
> - continue;
> -
> - for (int j = 0; j < 4; j++) {
> - int swiz = BRW_GET_SWZ(inst->src[i].swizzle, j);
> - dead_channels &= ~(1 << swiz);
> - }
> - }
> - }
> -
> - foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst,
> - inst, block) {
> - if (dead_channels == 0)
> - break;
> -
> - if (inst_writes_flag) {
> - if (scan_inst->dst.is_null() && scan_inst->writes_flag()) {
> - scan_inst->opcode = BRW_OPCODE_NOP;
> - progress = true;
> - continue;
> - } else if (scan_inst->reads_flag()) {
> - break;
> - }
> - }
> -
> - if (inst->dst.file == scan_inst->dst.file &&
> - inst->dst.reg == scan_inst->dst.reg &&
> - inst->dst.reg_offset == scan_inst->dst.reg_offset) {
> - int new_writemask = scan_inst->dst.writemask & ~dead_channels;
> -
> - progress = try_eliminate_instruction(scan_inst, new_writemask, brw) ||
> - progress;
> - }
> -
> - for (int i = 0; i < 3; i++) {
> - if (scan_inst->src[i].file != inst->dst.file ||
> - scan_inst->src[i].reg != inst->dst.reg)
> - continue;
> -
> - for (int j = 0; j < 4; j++) {
> - int swiz = BRW_GET_SWZ(scan_inst->src[i].swizzle, j);
> - dead_channels &= ~(1 << swiz);
> - }
> - }
> - }
> - }
> -
> - if (progress) {
> - foreach_block_and_inst_safe (block, backend_instruction, inst, cfg) {
> - if (inst->opcode == BRW_OPCODE_NOP) {
> - inst->remove(block);
> - }
> - }
> -
> - invalidate_live_intervals();
> - }
> -
> - return progress;
> -}
> -
> void
> vec4_visitor::split_uniform_registers()
> {
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> new file mode 100644
> index 0000000..b8370ba
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_dead_code_eliminate.cpp
> @@ -0,0 +1,169 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "brw_vec4.h"
> +#include "brw_vec4_live_variables.h"
> +#include "brw_cfg.h"
> +
> +/** @file brw_vec4_dead_code_eliminate.cpp
> + *
> + * Dataflow-aware dead code elimination.
> + *
> + * Walks the instruction list from the bottom, removing instructions that
> + * have results that both aren't used in later blocks and haven't been read
> + * yet in the tail end of this block.
> + */
> +
> +using namespace brw;
> +
> +static bool
> +can_do_writemask(const struct brw_context *brw,
> + const vec4_instruction *inst)
> +{
> + switch (inst->opcode) {
> + case SHADER_OPCODE_GEN4_SCRATCH_READ:
> + case VS_OPCODE_PULL_CONSTANT_LOAD:
> + case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
> + return false;
> + default:
> + /* The MATH instruction on Gen6 only executes in align1 mode, which does
> + * not support writemasking.
> + */
> + if (brw->gen == 6 && inst->is_math())
> + return false;
> +
> + if (inst->is_tex())
> + return false;
I'd feel a lot more confident in this function if it were:
{
/* The MATH instruction on Gen6 only executes in align1 mode, which does
* not support writemasking.
*/
if (brw->gen == 6 && inst->is_math())
return false;
return inst->mlen == 0;
}
> +
> + return true;
> + }
> +}
> +
> +bool
> +vec4_visitor::dead_code_eliminate()
> +{
> + bool progress = false;
> +
> + calculate_live_intervals();
> +
> + int num_vars = live_intervals->num_vars;
> + BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
> + BITSET_WORD *flag_live = ralloc_array(NULL, BITSET_WORD, 1);
> +
> + foreach_block(block, cfg) {
> + memcpy(live, live_intervals->block_data[block->num].liveout,
> + sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
> + memcpy(flag_live, live_intervals->block_data[block->num].flag_liveout,
> + sizeof(BITSET_WORD));
> +
> + foreach_inst_in_block_reverse(vec4_instruction, inst, block) {
> + if (inst->dst.file == GRF && !inst->has_side_effects()) {
> + bool result_live[4] = { false };
> +
> + for (int c = 0; c < 4; c++) {
> + int var = inst->dst.reg * 4 + c;
> + result_live[c] = BITSET_TEST(live, var);
> + }
> +
> + /* If the instruction can't do writemasking, then it's all or
> + * nothing.
> + */
> + if (!can_do_writemask(brw, inst)) {
> + bool result = result_live[0] | result_live[1] |
> + result_live[2] | result_live[3];
A little strange to be doing bitwise-or on bools, but...it's not wrong...
> + result_live[0] = result;
> + result_live[1] = result;
> + result_live[2] = result;
> + result_live[3] = result;
> + }
> +
> + for (int c = 0; c < 4; c++) {
> + if (!result_live[c] && inst->dst.writemask & (1 << c)) {
> + inst->dst.writemask &= ~(1 << c);
> + progress = true;
> +
> + if (inst->dst.writemask == 0) {
> + if (inst->writes_accumulator) {
Same comment I had about the FS pass. I think you want:
if (inst->writes_accumulator || inst->writes_flag())
Otherwise, we stop generating the flag, which may still be necessary (we haven't checked).
With can_do_writemask changed, and this flag bug fixed, the patch would be:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
Great work as always.
> + inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
> + } else {
> + inst->opcode = BRW_OPCODE_NOP;
> + continue;
> + }
> + }
> + }
> + }
> + }
> +
> + if (inst->dst.is_null() && inst->writes_flag()) {
> + if (!BITSET_TEST(flag_live, 0)) {
> + inst->opcode = BRW_OPCODE_NOP;
> + progress = true;
> + continue;
> + }
> + }
> +
> + if (inst->dst.file == GRF && !inst->predicate) {
> + for (int c = 0; c < 4; c++) {
> + if (inst->dst.writemask & (1 << c)) {
> + int var = inst->dst.reg * 4 + c;
> + BITSET_CLEAR(live, var);
> + }
> + }
> + }
> +
> + if (inst->writes_flag()) {
> + BITSET_CLEAR(flag_live, 0);
> + }
> +
> + for (int i = 0; i < 3; i++) {
> + if (inst->src[i].file == GRF) {
> + for (int c = 0; c < 4; c++) {
> + int swiz = BRW_GET_SWZ(inst->src[i].swizzle, c);
> + int var = inst->src[i].reg * 4 + swiz;
> +
> + BITSET_SET(live, var);
> + }
> + }
> + }
> +
> + if (inst->reads_flag()) {
> + BITSET_SET(flag_live, 0);
> + }
> + }
> + }
> +
> + ralloc_free(live);
> + ralloc_free(flag_live);
> +
> + if (progress) {
> + foreach_block_and_inst_safe(block, backend_instruction, inst, cfg) {
> + if (inst->opcode == BRW_OPCODE_NOP) {
> + inst->remove(block);
> + }
> + }
> +
> + invalidate_live_intervals();
> + }
> +
> + return progress;
> +}
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20141108/6c32a1c1/attachment-0001.sig>
More information about the mesa-dev
mailing list