[Mesa-dev] [PATCH v2 062/103] i965/vec4: do not emit 64-bit MAD
Iago Toral Quiroga
itoral at igalia.com
Tue Oct 11 09:02:06 UTC 2016
The previous patch made sure that we do not generate MAD instructions
for any NIR's 64-bit ffma, but there is nothing preventing i965 from
producing MAD instructions as a result of lowerings or optimization
passes. This patch makes sure that any 64-bit MAD produced inside the
driver after translating from NIR is also converted to MUL+ADD before
we generate code.
v2:
- Use a copy constructor to copy all relevant instruction fields from
the original mad into the add and mul instructions
---
src/mesa/drivers/dri/i965/brw_vec4.cpp | 44 ++++++++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_vec4.h | 1 +
2 files changed, 45 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 190581e..7af65ab 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -2255,6 +2255,49 @@ vec4_visitor::scalarize_df()
return progress;
}
+bool
+vec4_visitor::translate_64bit_mad_to_mul_add()
+{
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ if (inst->opcode != BRW_OPCODE_MAD)
+ continue;
+
+ if (type_sz(inst->dst.type) != 8)
+ continue;
+
+ dst_reg mul_dst = dst_reg(this, glsl_type::dvec4_type);
+
+ /* Use the copy constructor so we copy all relevant instruction fields
+ * from the original mad into the add and mul instructions
+ */
+ vec4_instruction *mul = new(mem_ctx) vec4_instruction(*inst);
+ mul->opcode = BRW_OPCODE_MUL;
+ mul->dst = mul_dst;
+ mul->src[0] = inst->src[1];
+ mul->src[1] = inst->src[2];
+ mul->src[2].file = BAD_FILE;
+
+ vec4_instruction *add = new(mem_ctx) vec4_instruction(*inst);
+ add->opcode = BRW_OPCODE_ADD;
+ add->src[0] = src_reg(mul_dst);
+ add->src[1] = inst->src[0];
+ add->src[2].file = BAD_FILE;
+
+ inst->insert_before(block, mul);
+ inst->insert_before(block, add);
+ inst->remove(block);
+
+ progress = true;
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
/* The align16 hardware can only do 32-bit swizzle channels, so we need to
* translate the logical 64-bit swizzle channels that we use in the Vec4 IR
* to 32-bit swizzle channels in hardware registers.
@@ -2414,6 +2457,7 @@ vec4_visitor::run()
if (failed)
return false;
+ OPT(translate_64bit_mad_to_mul_add);
OPT(scalarize_df);
setup_payload();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 7e51c41..0af55c5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -164,6 +164,7 @@ public:
bool lower_simd_width();
bool scalarize_df();
+ bool translate_64bit_mad_to_mul_add();
void apply_logical_swizzle(struct brw_reg *hw_reg,
vec4_instruction *inst, int arg);
--
2.7.4
More information about the mesa-dev
mailing list