[Mesa-dev] [PATCH 4/6] i965/vec4: copy propagate 'NOT' instruction when used with logical operation
Abdiel Janulgue
abdiel.janulgue at linux.intel.com
Tue Jun 3 15:59:24 PDT 2014
On Broadwell, this reduces the instruction to 1 operation when 'NOT' is used with a
logical instruction.
Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
---
src/mesa/drivers/dri/i965/brw_vec4.h | 4 +-
.../drivers/dri/i965/brw_vec4_copy_propagation.cpp | 62 +++++++++++++---------
2 files changed, 40 insertions(+), 26 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index fd58b3c..51da46c 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -228,6 +228,8 @@ writemask(dst_reg reg, unsigned mask)
return reg;
}
+struct copy_entry;
+
class vec4_instruction : public backend_instruction {
public:
DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction)
@@ -498,7 +500,7 @@ public:
vec4_instruction *last_rhs_inst);
bool try_copy_propagation(vec4_instruction *inst, int arg,
- src_reg *values[4]);
+ struct copy_entry *entry);
/** Walks an exec_list of ir_instruction and sends it through this visitor. */
void visit_instructions(const exec_list *list);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 83cf191..ab50d00 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -36,10 +36,16 @@ extern "C" {
namespace brw {
+struct copy_entry {
+ src_reg *value[4];
+ enum opcode opcode;
+};
+
static bool
-is_direct_copy(vec4_instruction *inst)
+can_propagate_from(struct brw_context *brw, vec4_instruction *inst)
{
- return (inst->opcode == BRW_OPCODE_MOV &&
+ return ((inst->opcode == BRW_OPCODE_MOV ||
+ (inst->opcode == BRW_OPCODE_NOT && brw->gen >= 8)) &&
!inst->predicate &&
inst->dst.file == GRF &&
!inst->saturate &&
@@ -197,22 +203,22 @@ try_constant_propagation(vec4_instruction *inst, int arg, src_reg *values[4])
bool
vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
- src_reg *values[4])
+ struct copy_entry *entry)
{
/* For constant propagation, we only handle the same constant
* across all 4 channels. Some day, we should handle the 8-bit
* float vector format, which would let us constant propagate
* vectors better.
*/
- src_reg value = *values[0];
+ src_reg value = *(entry->value[0]);
for (int i = 1; i < 4; i++) {
/* This is equals() except we don't care about the swizzle. */
- if (value.file != values[i]->file ||
- value.reg != values[i]->reg ||
- value.reg_offset != values[i]->reg_offset ||
- value.type != values[i]->type ||
- value.negate != values[i]->negate ||
- value.abs != values[i]->abs) {
+ if (value.file != entry->value[i]->file ||
+ value.reg != entry->value[i]->reg ||
+ value.reg_offset != entry->value[i]->reg_offset ||
+ value.type != entry->value[i]->type ||
+ value.negate != entry->value[i]->negate ||
+ value.abs != entry->value[i]->abs) {
return false;
}
}
@@ -223,7 +229,7 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
*/
int s[4];
for (int i = 0; i < 4; i++) {
- s[i] = BRW_GET_SWZ(values[i]->swizzle,
+ s[i] = BRW_GET_SWZ(entry->value[i]->swizzle,
BRW_GET_SWZ(inst->src[arg].swizzle, i));
}
value.swizzle = BRW_SWIZZLE4(s[0], s[1], s[2], s[3]);
@@ -277,6 +283,10 @@ vec4_visitor::try_copy_propagation(vec4_instruction *inst, int arg,
value.type = inst->src[arg].type;
inst->src[arg] = value;
+
+ if (brw->gen >=8 && entry->opcode == BRW_OPCODE_NOT)
+ inst->src[arg].negate ^= !value.negate;
+
return true;
}
@@ -284,9 +294,9 @@ bool
vec4_visitor::opt_copy_propagation()
{
bool progress = false;
- src_reg *cur_value[virtual_grf_reg_count][4];
+ struct copy_entry entries[virtual_grf_reg_count];
- memset(&cur_value, 0, sizeof(cur_value));
+ memset(&entries, 0, sizeof(entries));
foreach_list(node, &this->instructions) {
vec4_instruction *inst = (vec4_instruction *)node;
@@ -299,7 +309,7 @@ vec4_visitor::opt_copy_propagation()
* src/glsl/opt_copy_propagation.cpp to track available copies.
*/
if (!is_dominated_by_previous_instruction(inst)) {
- memset(cur_value, 0, sizeof(cur_value));
+ memset(&entries, 0, sizeof(entries));
continue;
}
@@ -320,31 +330,32 @@ vec4_visitor::opt_copy_propagation()
/* Find the regs that each swizzle component came from.
*/
- src_reg *values[4];
+ struct copy_entry entry;
int c;
for (c = 0; c < 4; c++) {
- values[c] = cur_value[reg][BRW_GET_SWZ(inst->src[i].swizzle, c)];
+ entry.value[c] = entries[reg].value[BRW_GET_SWZ(inst->src[i].swizzle, c)];
/* If there's no available copy for this channel, bail.
* We could be more aggressive here -- some channels might
* not get used based on the destination writemask.
*/
- if (!values[c])
+ if (!entry.value[c])
break;
/* We'll only be able to copy propagate if the sources are
* all from the same file -- there's no ability to swizzle
* 0 or 1 constants in with source registers like in i915.
*/
- if (c > 0 && values[c - 1]->file != values[c]->file)
+ if (c > 0 && entry.value[c - 1]->file != entry.value[c]->file)
break;
}
if (c != 4)
continue;
- if (try_constant_propagation(inst, i, values) ||
- try_copy_propagation(inst, i, values))
+ entry.opcode = entries[reg].opcode;
+ if (try_constant_propagation(inst, i, entry.value) ||
+ try_copy_propagation(inst, i, &entry))
progress = true;
}
@@ -357,10 +368,11 @@ vec4_visitor::opt_copy_propagation()
* the value is the newly propagated source. Otherwise, we don't know
* the new value, so clear it.
*/
- bool direct_copy = is_direct_copy(inst);
+ bool prop = can_propagate_from(brw, inst);
for (int i = 0; i < 4; i++) {
if (inst->dst.writemask & (1 << i)) {
- cur_value[reg][i] = direct_copy ? &inst->src[0] : NULL;
+ entries[reg].value[i] = prop ? &inst->src[0] : NULL;
+ entries[reg].opcode = inst->opcode;
}
}
@@ -368,12 +380,12 @@ vec4_visitor::opt_copy_propagation()
* our destination's updated channels, as the two are no longer equal.
*/
if (inst->dst.reladdr)
- memset(cur_value, 0, sizeof(cur_value));
+ memset(&entries, 0, sizeof(entries));
else {
for (int i = 0; i < virtual_grf_reg_count; i++) {
for (int j = 0; j < 4; j++) {
- if (is_channel_updated(inst, cur_value[i], j)){
- cur_value[i][j] = NULL;
+ if (is_channel_updated(inst, entries[i].value, j)){
+ entries[i].value[j] = NULL;
}
}
}
--
1.9.1
More information about the mesa-dev
mailing list