Mesa (master): i965/vec4: add opportunistic behaviour to opt_vector_float( )
Matt Turner
mattst88 at kemper.freedesktop.org
Sat Mar 5 03:16:53 UTC 2016
Module: Mesa
Branch: master
Commit: 2f76a9924e7b0b33a508ee3651b0cb2ab536a7dc
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f76a9924e7b0b33a508ee3651b0cb2ab536a7dc
Author: Juan A. Suarez Romero <jasuarez at igalia.com>
Date: Wed Mar 2 13:21:02 2016 +0100
i965/vec4: add opportunistic behaviour to opt_vector_float()
opt_vector_float() transforms several scalar MOV operations to a single
vectorial MOV.
This is done when those MOV covers all the components of the destination
register. So something like:
mov vgrf3.0.xy:D, 0D
mov vgrf3.0.w:D, 1065353216D
mov vgrf3.0.z:D, 0D
is transformed in:
mov vgrf3.0:F, [0F, 0F, 0F, 1F]
But there are cases where not all the components are written. For
example, in:
mov vgrf2.0.x:D, 1073741824D
mov vgrf3.0.xy:D, 0D
mov vgrf3.0.w:D, 1065353216D
mov vgrf4.0.xy:D, 1065353216D
mov vgrf4.0.w:D, 0D
mov vgrf6.0:UD, u4.xyzw:UD
Nor vgrf3 nor vgrf4 .z components are written, so the optimization is
not applied.
But it could be applied anyway with the components covered, using a
writemask to select the ones written. So we could transform it in:
mov vgrf2.0.x:D, 1073741824D
mov vgrf3.0.xyw:F, [0F, 0F, 0F, 1F]
mov vgrf4.0.xyw:F, [1F, 1F, 0F, 0F]
mov vgrf6.0:UD, u4.xyzw:UD
This commit does precisely that: opportunistically apply
opt_vector_float() when possible.
total instructions in shared programs: 7124660 -> 7114784 (-0.14%)
instructions in affected programs: 443078 -> 433202 (-2.23%)
helped: 4998
HURT: 0
total cycles in shared programs: 64757760 -> 64728016 (-0.05%)
cycles in affected programs: 1401686 -> 1371942 (-2.12%)
helped: 3243
HURT: 38
v2: change vectorize_mov() signature (Matt).
v3: take in account predicates (Juan).
v4 [mattst88]: Update shader-db numbers. Fix some whitespace issues.
Reviewed-by: Matt Turner <mattst88 at gmail.com>
Signed-off-by: Juan A. Suarez Romero <jasuarez at igalia.com>
---
src/mesa/drivers/dri/i965/brw_vec4.cpp | 60 ++++++++++++++++++++++------------
src/mesa/drivers/dri/i965/brw_vec4.h | 4 +++
2 files changed, 43 insertions(+), 21 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 3618c72..cf62ed9 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -321,6 +321,28 @@ src_reg::equals(const src_reg &r) const
}
bool
+vec4_visitor::vectorize_mov(bblock_t *block, vec4_instruction *inst,
+ uint8_t imm[4], vec4_instruction *imm_inst[4],
+ int inst_count, unsigned writemask)
+{
+ if (inst_count < 2)
+ return false;
+
+ unsigned vf;
+ memcpy(&vf, imm, sizeof(vf));
+ vec4_instruction *mov = MOV(imm_inst[0]->dst, brw_imm_vf(vf));
+ mov->dst.type = BRW_REGISTER_TYPE_F;
+ mov->dst.writemask = writemask;
+ inst->insert_before(block, mov);
+
+ for (int i = 0; i < inst_count; i++) {
+ imm_inst[i]->remove(block);
+ }
+
+ return true;
+}
+
+bool
vec4_visitor::opt_vector_float()
{
bool progress = false;
@@ -328,27 +350,38 @@ vec4_visitor::opt_vector_float()
int last_reg = -1, last_reg_offset = -1;
enum brw_reg_file last_reg_file = BAD_FILE;
- int remaining_channels = 0;
- uint8_t imm[4];
+ uint8_t imm[4] = { 0 };
int inst_count = 0;
vec4_instruction *imm_inst[4];
+ unsigned writemask = 0;
foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
if (last_reg != inst->dst.nr ||
last_reg_offset != inst->dst.reg_offset ||
last_reg_file != inst->dst.file) {
+ progress |= vectorize_mov(block, inst, imm, imm_inst, inst_count,
+ writemask);
+ inst_count = 0;
+ writemask = 0;
last_reg = inst->dst.nr;
last_reg_offset = inst->dst.reg_offset;
last_reg_file = inst->dst.file;
- remaining_channels = WRITEMASK_XYZW;
- inst_count = 0;
+ for (int i = 0; i < 4; i++) {
+ imm[i] = 0;
+ }
}
if (inst->opcode != BRW_OPCODE_MOV ||
inst->dst.writemask == WRITEMASK_XYZW ||
- inst->src[0].file != IMM)
+ inst->src[0].file != IMM ||
+ inst->predicate != BRW_PREDICATE_NONE) {
+ progress |= vectorize_mov(block, inst, imm, imm_inst, inst_count,
+ writemask);
+ inst_count = 0;
+ last_reg = -1;
continue;
+ }
int vf = brw_float_to_vf(inst->src[0].f);
if (vf == -1)
@@ -363,23 +396,8 @@ vec4_visitor::opt_vector_float()
if ((inst->dst.writemask & WRITEMASK_W) != 0)
imm[3] = vf;
+ writemask |= inst->dst.writemask;
imm_inst[inst_count++] = inst;
-
- remaining_channels &= ~inst->dst.writemask;
- if (remaining_channels == 0) {
- unsigned vf;
- memcpy(&vf, imm, sizeof(vf));
- vec4_instruction *mov = MOV(inst->dst, brw_imm_vf(vf));
- mov->dst.type = BRW_REGISTER_TYPE_F;
- mov->dst.writemask = WRITEMASK_XYZW;
- inst->insert_after(block, mov);
- last_reg = -1;
-
- for (int i = 0; i < inst_count; i++) {
- imm_inst[i]->remove(block);
- }
- progress = true;
- }
}
if (progress)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 633f13c..91771b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -369,6 +369,10 @@ protected:
virtual void gs_end_primitive();
private:
+ bool vectorize_mov(bblock_t *block, vec4_instruction *inst,
+ uint8_t imm[4], vec4_instruction *imm_inst[4],
+ int inst_count, unsigned writemask);
+
/**
* If true, then register allocation should fail instead of spilling.
*/
More information about the mesa-commit
mailing list