[Mesa-dev] [PATCH 09/10] i965/fs: Move constant propagation to the same codebase as copy prop.
Eric Anholt
eric at anholt.net
Sat Sep 22 14:04:49 PDT 2012
This means that we don't get constant prop across into the first block after a
BRW_OPCODE_IF or a BRW_OPCODE_DO, but we have hope for properly doing it
across control flow at some point. More importantly, it avoids the O(n^2)
with instruction count runtime for shaders that have many constant moves.
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 163 --------------------
src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
.../drivers/dri/i965/brw_fs_copy_propagation.cpp | 124 ++++++++++++++-
3 files changed, 124 insertions(+), 165 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2701413..0545a74 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1249,168 +1249,6 @@ fs_visitor::setup_pull_constants()
c->prog_data.nr_pull_params = pull_uniform_count;
}
-/**
- * Attempts to move immediate constants into the immediate
- * constant slot of following instructions.
- *
- * Immediate constants are a bit tricky -- they have to be in the last
- * operand slot, you can't do abs/negate on them,
- */
-
-bool
-fs_visitor::propagate_constants()
-{
- bool progress = false;
-
- calculate_live_intervals();
-
- foreach_list(node, &this->instructions) {
- fs_inst *inst = (fs_inst *)node;
-
- if (inst->opcode != BRW_OPCODE_MOV ||
- inst->predicated ||
- inst->dst.file != GRF || inst->src[0].file != IMM ||
- inst->dst.type != inst->src[0].type ||
- (c->dispatch_width == 16 &&
- (inst->force_uncompressed || inst->force_sechalf)))
- continue;
-
- /* Don't bother with cases where we should have had the
- * operation on the constant folded in GLSL already.
- */
- if (inst->saturate)
- continue;
-
- /* Found a move of a constant to a GRF. Find anything else using the GRF
- * before it's written, and replace it with the constant if we can.
- */
- for (fs_inst *scan_inst = (fs_inst *)inst->next;
- !scan_inst->is_tail_sentinel();
- scan_inst = (fs_inst *)scan_inst->next) {
- if (scan_inst->opcode == BRW_OPCODE_DO ||
- scan_inst->opcode == BRW_OPCODE_WHILE ||
- scan_inst->opcode == BRW_OPCODE_ELSE ||
- scan_inst->opcode == BRW_OPCODE_ENDIF) {
- break;
- }
-
- for (int i = 2; i >= 0; i--) {
- if (scan_inst->src[i].file != GRF ||
- scan_inst->src[i].reg != inst->dst.reg ||
- scan_inst->src[i].reg_offset != inst->dst.reg_offset)
- continue;
-
- /* Don't bother with cases where we should have had the
- * operation on the constant folded in GLSL already.
- */
- if (scan_inst->src[i].negate || scan_inst->src[i].abs)
- continue;
-
- switch (scan_inst->opcode) {
- case BRW_OPCODE_MOV:
- scan_inst->src[i] = inst->src[0];
- progress = true;
- break;
-
- case BRW_OPCODE_MUL:
- case BRW_OPCODE_ADD:
- if (i == 1) {
- scan_inst->src[i] = inst->src[0];
- progress = true;
- } else if (i == 0 && scan_inst->src[1].file != IMM) {
- /* Fit this constant in by commuting the operands.
- * Exception: we can't do this for 32-bit integer MUL
- * because it's asymmetric.
- */
- if (scan_inst->opcode == BRW_OPCODE_MUL &&
- (scan_inst->src[1].type == BRW_REGISTER_TYPE_D ||
- scan_inst->src[1].type == BRW_REGISTER_TYPE_UD))
- break;
- scan_inst->src[0] = scan_inst->src[1];
- scan_inst->src[1] = inst->src[0];
- progress = true;
- }
- break;
-
- case BRW_OPCODE_CMP:
- case BRW_OPCODE_IF:
- if (i == 1) {
- scan_inst->src[i] = inst->src[0];
- progress = true;
- } else if (i == 0 && scan_inst->src[1].file != IMM) {
- uint32_t new_cmod;
-
- new_cmod = brw_swap_cmod(scan_inst->conditional_mod);
- if (new_cmod != ~0u) {
- /* Fit this constant in by swapping the operands and
- * flipping the test
- */
- scan_inst->src[0] = scan_inst->src[1];
- scan_inst->src[1] = inst->src[0];
- scan_inst->conditional_mod = new_cmod;
- progress = true;
- }
- }
- break;
-
- case BRW_OPCODE_SEL:
- if (i == 1) {
- scan_inst->src[i] = inst->src[0];
- progress = true;
- } else if (i == 0 && scan_inst->src[1].file != IMM) {
- scan_inst->src[0] = scan_inst->src[1];
- scan_inst->src[1] = inst->src[0];
-
- /* If this was predicated, flipping operands means
- * we also need to flip the predicate.
- */
- if (scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) {
- scan_inst->predicate_inverse =
- !scan_inst->predicate_inverse;
- }
- progress = true;
- }
- break;
-
- case SHADER_OPCODE_RCP:
- /* The hardware doesn't do math on immediate values
- * (because why are you doing that, seriously?), but
- * the correct answer is to just constant fold it
- * anyway.
- */
- assert(i == 0);
- if (inst->src[0].imm.f != 0.0f) {
- scan_inst->opcode = BRW_OPCODE_MOV;
- scan_inst->src[0] = inst->src[0];
- scan_inst->src[0].imm.f = 1.0f / scan_inst->src[0].imm.f;
- progress = true;
- }
- break;
-
- case FS_OPCODE_PULL_CONSTANT_LOAD:
- scan_inst->src[i] = inst->src[0];
- progress = true;
- break;
-
- default:
- break;
- }
- }
-
- if (scan_inst->dst.file == GRF &&
- scan_inst->overwrites_reg(inst->dst)) {
- break;
- }
- }
- }
-
- if (progress)
- this->live_intervals_valid = false;
-
- return progress;
-}
-
-
bool
fs_visitor::opt_algebraic()
{
@@ -2025,7 +1863,6 @@ fs_visitor::run()
progress = remove_duplicate_mrf_writes() || progress;
- progress = propagate_constants() || progress;
progress = opt_algebraic() || progress;
progress = opt_cse() || progress;
progress = opt_copy_propagate() || progress;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 9fbb8e5..59a0e50 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -240,12 +240,12 @@ public:
void split_virtual_grfs();
void setup_pull_constants();
void calculate_live_intervals();
- bool propagate_constants();
bool opt_algebraic();
bool opt_cse();
bool opt_cse_local(fs_bblock *block, exec_list *aeb);
bool opt_copy_propagate();
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
+ bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
bool opt_copy_propagate_local(void *mem_ctx, fs_bblock *block,
exec_list *acp);
bool register_coalesce();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 1870f43..ad34657 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -34,6 +34,9 @@ struct acp_entry : public exec_node {
bool
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
{
+ if (entry->src.file == IMM)
+ return false;
+
if (inst->src[arg].file != entry->dst.file ||
inst->src[arg].reg != entry->dst.reg ||
inst->src[arg].reg_offset != entry->dst.reg_offset) {
@@ -64,6 +67,121 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
return true;
}
+
+bool
+fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
+{
+ bool progress = false;
+
+ if (entry->src.file != IMM)
+ return false;
+
+ for (int i = 2; i >= 0; i--) {
+ if (inst->src[i].file != entry->dst.file ||
+ inst->src[i].reg != entry->dst.reg ||
+ inst->src[i].reg_offset != entry->dst.reg_offset)
+ continue;
+
+ /* Don't bother with cases where we should have had the
+ * operation on the constant folded in GLSL already.
+ */
+ if (inst->src[i].negate || inst->src[i].abs)
+ continue;
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ inst->src[i] = entry->src;
+ progress = true;
+ break;
+
+ case BRW_OPCODE_MUL:
+ case BRW_OPCODE_ADD:
+ if (i == 1) {
+ inst->src[i] = entry->src;
+ progress = true;
+ } else if (i == 0 && inst->src[1].file != IMM) {
+ /* Fit this constant in by commuting the operands.
+ * Exception: we can't do this for 32-bit integer MUL
+ * because it's asymmetric.
+ */
+ if (inst->opcode == BRW_OPCODE_MUL &&
+ (inst->src[1].type == BRW_REGISTER_TYPE_D ||
+ inst->src[1].type == BRW_REGISTER_TYPE_UD))
+ break;
+ inst->src[0] = inst->src[1];
+ inst->src[1] = entry->src;
+ progress = true;
+ }
+ break;
+
+ case BRW_OPCODE_CMP:
+ case BRW_OPCODE_IF:
+ if (i == 1) {
+ inst->src[i] = entry->src;
+ progress = true;
+ } else if (i == 0 && inst->src[1].file != IMM) {
+ uint32_t new_cmod;
+
+ new_cmod = brw_swap_cmod(inst->conditional_mod);
+ if (new_cmod != ~0u) {
+ /* Fit this constant in by swapping the operands and
+ * flipping the test
+ */
+ inst->src[0] = inst->src[1];
+ inst->src[1] = entry->src;
+ inst->conditional_mod = new_cmod;
+ progress = true;
+ }
+ }
+ break;
+
+ case BRW_OPCODE_SEL:
+ if (i == 1) {
+ inst->src[i] = entry->src;
+ progress = true;
+ } else if (i == 0 && inst->src[1].file != IMM) {
+ inst->src[0] = inst->src[1];
+ inst->src[1] = entry->src;
+
+ /* If this was predicated, flipping operands means
+ * we also need to flip the predicate.
+ */
+ if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
+ inst->predicate_inverse =
+ !inst->predicate_inverse;
+ }
+ progress = true;
+ }
+ break;
+
+ case SHADER_OPCODE_RCP:
+ /* The hardware doesn't do math on immediate values
+ * (because why are you doing that, seriously?), but
+ * the correct answer is to just constant fold it
+ * anyway.
+ */
+ assert(i == 0);
+ if (inst->src[0].imm.f != 0.0f) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[0] = entry->src;
+ inst->src[0].imm.f = 1.0f / inst->src[0].imm.f;
+ progress = true;
+ }
+ break;
+
+ case FS_OPCODE_PULL_CONSTANT_LOAD:
+ inst->src[i] = entry->src;
+ progress = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
/** @file brw_fs_copy_propagation.cpp
*
* Support for local copy propagation by walking the list of instructions
@@ -90,6 +208,9 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
foreach_list(entry_node, acp) {
acp_entry *entry = (acp_entry *)entry_node;
+ if (try_constant_propagate(inst, entry))
+ progress = true;
+
for (int i = 0; i < 3; i++) {
if (try_copy_propagate(inst, i, entry))
progress = true;
@@ -114,7 +235,8 @@ fs_visitor::opt_copy_propagate_local(void *mem_ctx,
((inst->src[0].file == GRF &&
(inst->src[0].reg != inst->dst.reg ||
inst->src[0].reg_offset != inst->dst.reg_offset)) ||
- inst->src[0].file == UNIFORM) &&
+ inst->src[0].file == UNIFORM ||
+ inst->src[0].file == IMM) &&
inst->src[0].type == inst->dst.type &&
!inst->saturate &&
!inst->predicated &&
--
1.7.10.4
More information about the mesa-dev
mailing list