[Mesa-dev] [PATCH 09/10] i965/fs: Move constant propagation to the same codebase as copy prop.
Kenneth Graunke
kenneth at whitecape.org
Sun Oct 7 18:52:25 PDT 2012
On 09/22/2012 02:04 PM, Eric Anholt wrote:
> This means that we don't get constant prop across into the first block after a
> BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
> across control flow at some point. More importantly, it avoids the O(n^2)
> with instruction count runtime for shaders that have many constant moves.
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 163 --------------------
> src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
> .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 124 ++++++++++++++-
> 3 files changed, 124 insertions(+), 165 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 2701413..0545a74 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
> c->prog_data.nr_pull_params = pull_uniform_count;
> }
>
> -/**
> - * Attempts to move immediate constants into the immediate
> - * constant slot of following instructions.
> - *
> - * Immediate constants are a bit tricky -- they have to be in the last
> - * operand slot, you can't do abs/negate on them,
> - */
> -
> -bool
> -fs_visitor::propagate_constants()
> -{
> - bool progress = false;
> -
> - calculate_live_intervals();
> -
> - foreach_list(node, &this->instructions) {
> - fs_inst *inst = (fs_inst *)node;
> -
> - if (inst->opcode != BRW_OPCODE_MOV ||
> - inst->predicated ||
> - inst->dst.file != GRF || inst->src[0].file != IMM ||
> - inst->dst.type != inst->src[0].type ||
> - (c->dispatch_width == 16 &&
> - (inst->force_uncompressed || inst->force_sechalf)))
> - continue;
> -
> - /* Don't bother with cases where we should have had the
> - * operation on the constant folded in GLSL already.
> - */
> - if (inst->saturate)
> - continue;
> -
> - /* Found a move of a constant to a GRF. Find anything else using the GRF
> - * before it's written, and replace it with the constant if we can.
> - */
> - for (fs_inst *scan_inst = (fs_inst *)inst->next;
> - !scan_inst->is_tail_sentinel();
> - scan_inst = (fs_inst *)scan_inst->next) {
> - if (scan_inst->opcode == BRW_OPCODE_DO ||
> - scan_inst->opcode == BRW_OPCODE_WHILE ||
> - scan_inst->opcode == BRW_OPCODE_ELSE ||
> - scan_inst->opcode == BRW_OPCODE_ENDIF) {
> - break;
> - }
> -
> - for (int i = 2; i >= 0; i--) {
> - if (scan_inst->src[i].file != GRF ||
> - scan_inst->src[i].reg != inst->dst.reg ||
> - scan_inst->src[i].reg_offset != inst->dst.reg_offset)
> - continue;
> -
> - /* Don't bother with cases where we should have had the
> - * operation on the constant folded in GLSL already.
> - */
> - if (scan_inst->src[i].negate || scan_inst->src[i].abs)
> - continue;
> -
> - switch (scan_inst->opcode) {
> - case BRW_OPCODE_MOV:
> - scan_inst->src[i] = inst->src[0];
> - progress = true;
> - break;
> -
> - case BRW_OPCODE_MUL:
> - case BRW_OPCODE_ADD:
> - if (i == 1) {
> - scan_inst->src[i] = inst->src[0];
> - progress = true;
> - } else if (i == 0 && scan_inst->src[1].file != IMM) {
> - /* Fit this constant in by commuting the operands.
> - * Exception: we can't do this for 32-bit integer MUL
> - * because it's asymmetric.
> - */
> - if (scan_inst->opcode == BRW_OPCODE_MUL &&
> - (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
> - scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
> - break;
> - scan_inst->src[0] = scan_inst->src[1];
> - scan_inst->src[1] = inst->src[0];
> - progress = true;
> - }
> - break;
> -
> - case BRW_OPCODE_CMP:
> - case BRW_OPCODE_IF:
> - if (i == 1) {
> - scan_inst->src[i] = inst->src[0];
> - progress = true;
> - } else if (i == 0 && scan_inst->src[1].file != IMM) {
> - uint32_t new_cmod;
> -
> - new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
> - if (new_cmod != ~0u) {
> - /* Fit this constant in by swapping the operands and
> - * flipping the test
> - */
> - scan_inst->src[0] = scan_inst->src[1];
> - scan_inst->src[1] = inst->src[0];
> - scan_inst->conditional_mod = new_cmod;
> - progress = true;
> - }
> - }
> - break;
> -
> - case BRW_OPCODE_SEL:
> - if (i == 1) {
> - scan_inst->src[i] = inst->src[0];
> - progress = true;
> - } else if (i == 0 && scan_inst->src[1].file != IMM) {
> - scan_inst->src[0] = scan_inst->src[1];
> - scan_inst->src[1] = inst->src[0];
> -
> - /* If this was predicated, flipping operands means
> - * we also need to flip the predicate.
> - */
> - if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
> - scan_inst->predicate_inverse =
> - !scan_inst->predicate_inverse;
> - }
> - progress = true;
> - }
> - break;
> -
> - case SHADER_OPCODE_RCP:
> - /* The hardware doesn't do math on immediate values
> - * (because why are you doing that, seriously?), but
> - * the correct answer is to just constant fold it
> - * anyway.
> - */
> - assert(i == 0);
> - if (inst->src[0].imm.f != 0.0f) {
> - scan_inst->opcode = BRW_OPCODE_MOV;
> - scan_inst->src[0] = inst->src[0];
> - scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
> - progress = true;
> - }
> - break;
> -
> - case FS_OPCODE_PULL_CONSTANT_LOAD:
> - scan_inst->src[i] = inst->src[0];
> - progress = true;
> - break;
> -
> - default:
> - break;
> - }
> - }
> -
> - if (scan_inst->dst.file == GRF &&
> - scan_inst->overwrites_reg(inst->dst)) {
> - break;
> - }
> - }
> - }
> -
> - if (progress)
> - this->live_intervals_valid = false;
> -
> - return progress;
> -}
> -
> -
> bool
> fs_visitor::opt_algebraic()
> {
> @@ -2025,7 +1863,6 @@ fs_visitor::run()
>
> progress = remove_duplicate_mrf_writes() || progress;
>
> - progress = propagate_constants() || progress;
> progress = opt_algebraic() || progress;
> progress = opt_cse() || progress;
> progress = opt_copy_propagate() || progress;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 9fbb8e5..59a0e50 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -240,12 +240,12 @@ public:
> void split_virtual_grfs();
> void setup_pull_constants();
> void calculate_live_intervals();
> - bool propagate_constants();
> bool opt_algebraic();
> bool opt_cse();
> bool opt_cse_local(fs_bblock *block, exec_list *aeb);
> bool opt_copy_propagate();
> bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
> + bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
> bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
> exec_list *acp);
> bool register_coalesce();
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> index 1870f43..ad34657 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
> @@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
> bool
> fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
> {
> + if (entry->src.file == IMM)
> + return false;
> +
> if (inst->src[arg].file != entry->dst.file ||
> inst->src[arg].reg != entry->dst.reg ||
> inst->src[arg].reg_offset != entry->dst.reg_offset) {
> @@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
> return true;
> }
>
> +
> +bool
> +fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
> +{
> + bool progress = false;
> +
> + if (entry->src.file != IMM)
> + return false;
> +
> + for (int i = 2; i >= 0; i--) {
> + if (inst->src[i].file != entry->dst.file ||
> + inst->src[i].reg != entry->dst.reg ||
> + inst->src[i].reg_offset != entry->dst.reg_offset)
> + continue;
> +
> + /* Don't bother with cases where we should have had the
> + * operation on the constant folded in GLSL already.
> + */
Your English doesn't quite flow here. Also, I don't like the
overloading of "GLSL"...it's a language defined by Khronos. (Our
compiler and IR really could use names...)
What about:
/* Don't bother with cases which should have been taken care of
* by the GLSL compiler's constant folding pass.
*/
Tiny nits aside, I like this. It makes sense to share code here...it's
very straightforward, and ought to help us out in the future.
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
> + if (inst->src[i].negate || inst->src[i].abs)
> + continue;
> +
> + switch (inst->opcode) {
> + case BRW_OPCODE_MOV:
> + inst->src[i] = entry->src;
> + progress = true;
> + break;
> +
> + case BRW_OPCODE_MUL:
> + case BRW_OPCODE_ADD:
> + if (i == 1) {
> + inst->src[i] = entry->src;
> + progress = true;
> + } else if (i == 0 && inst->src[1].file != IMM) {
> + /* Fit this constant in by commuting the operands.
> + * Exception: we can't do this for 32-bit integer MUL
> + * because it's asymmetric.
> + */
> + if (inst->opcode == BRW_OPCODE_MUL &&
> + (inst->src[1].type == BRW_REGISTER_TYPE_D ||
> + inst->src[1].type == BRW_REGISTER_TYPE_UD))
> + break;
> + inst->src[0] = inst->src[1];
> + inst->src[1] = entry->src;
> + progress = true;
> + }
> + break;
> +
> + case BRW_OPCODE_CMP:
> + case BRW_OPCODE_IF:
> + if (i == 1) {
> + inst->src[i] = entry->src;
> + progress = true;
> + } else if (i == 0 && inst->src[1].file != IMM) {
> + uint32_t new_cmod;
> +
> + new_cmod = brw_swap_cmod(inst->conditional_mod);
> + if (new_cmod != ~0u) {
> + /* Fit this constant in by swapping the operands and
> + * flipping the test
> + */
> + inst->src[0] = inst->src[1];
> + inst->src[1] = entry->src;
> + inst->conditional_mod = new_cmod;
> + progress = true;
> + }
> + }
> + break;
> +
> + case BRW_OPCODE_SEL:
> + if (i == 1) {
> + inst->src[i] = entry->src;
> + progress = true;
> + } else if (i == 0 && inst->src[1].file != IMM) {
> + inst->src[0] = inst->src[1];
> + inst->src[1] = entry->src;
> +
> + /* If this was predicated, flipping operands means
> + * we also need to flip the predicate.
> + */
> + if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
> + inst->predicate_inverse =
> + !inst->predicate_inverse;
> + }
> + progress = true;
> + }
> + break;
> +
> + case SHADER_OPCODE_RCP:
> + /* The hardware doesn't do math on immediate values
> + * (because why are you doing that, seriously?), but
> + * the correct answer is to just constant fold it
> + * anyway.
> + */
> + assert(i == 0);
> + if (inst->src[0].imm.f != 0.0f) {
> + inst->opcode = BRW_OPCODE_MOV;
> + inst->src[0] = entry->src;
> + inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
> + progress = true;
> + }
> + break;
> +
> + case FS_OPCODE_PULL_CONSTANT_LOAD:
> + inst->src[i] = entry->src;
> + progress = true;
> + break;
> +
> + default:
> + break;
> + }
> + }
> +
> + return progress;
> +}
> +
> /** @file brw_fs_copy_propagation.cpp
> *
> * Support for local copy propagation by walking the list of instructions
> @@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
> foreach_list(entry_node, acp) {
> acp_entry *entry = (acp_entry *)entry_node;
>
> + if (try_constant_propagate(inst, entry))
> + progress = true;
> +
> for (int i = 0; i < 3; i++) {
> if (try_copy_propagate(inst, i, entry))
> progress = true;
> @@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
> ((inst->src[0].file == GRF &&
> (inst->src[0].reg != inst->dst.reg ||
> inst->src[0].reg_offset != inst->dst.reg_offset)) ||
> - inst->src[0].file == UNIFORM) &&
> + inst->src[0].file == UNIFORM ||
> + inst->src[0].file == IMM) &&
> inst->src[0].type == inst->dst.type &&
> !inst->saturate &&
> !inst->predicated &&
>
More information about the mesa-dev
mailing list