[Mesa-dev] [PATCH v2 050/103] i965/vec4: teach CSE about exec_size, group and doubles
Iago Toral Quiroga
itoral at igalia.com
Tue Oct 11 09:01:54 UTC 2016
---
src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 31 +++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index bef897a..229d7b2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -130,6 +130,8 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
a->dst.writemask == b->dst.writemask &&
a->force_writemask_all == b->force_writemask_all &&
a->size_written == b->size_written &&
+ a->exec_size == b->exec_size &&
+ a->group == b->group &&
operands_match(a, b);
}
@@ -181,9 +183,17 @@ vec4_visitor::opt_cse_local(bblock_t *block)
regs_written(entry->generator)),
NULL), inst->dst.type);
- for (unsigned i = 0; i < regs_written(entry->generator); ++i) {
- vec4_instruction *copy = MOV(offset(entry->generator->dst, i),
- offset(entry->tmp, i));
+ unsigned type_scale = DIV_ROUND_UP(type_sz(entry->tmp.type), 4);
+ unsigned regs_per_mov =
+ DIV_ROUND_UP(type_scale * entry->generator->exec_size, 8);
+ unsigned num_copy_movs =
+ DIV_ROUND_UP(regs_written(entry->generator), regs_per_mov);
+ for (unsigned i = 0; i < num_copy_movs; ++i) {
+ vec4_instruction *copy =
+ MOV(offset(entry->generator->dst, i * regs_per_mov),
+ offset(entry->tmp, i * regs_per_mov));
+ copy->exec_size = entry->generator->exec_size;
+ copy->group = entry->generator->group;
copy->force_writemask_all =
entry->generator->force_writemask_all;
entry->generator->insert_after(block, copy);
@@ -195,10 +205,17 @@ vec4_visitor::opt_cse_local(bblock_t *block)
/* dest <- temp */
if (!inst->dst.is_null()) {
assert(inst->dst.type == entry->tmp.type);
-
- for (unsigned i = 0; i < regs_written(inst); ++i) {
- vec4_instruction *copy = MOV(offset(inst->dst, i),
- offset(entry->tmp, i));
+ unsigned type_scale = DIV_ROUND_UP(type_sz(inst->dst.type), 4);
+ unsigned regs_per_mov =
+ DIV_ROUND_UP(type_scale * inst->exec_size, 8);
+ unsigned num_copy_movs =
+ DIV_ROUND_UP(regs_written(inst), regs_per_mov);
+ for (unsigned i = 0; i < num_copy_movs; ++i) {
+ vec4_instruction *copy =
+ MOV(offset(inst->dst, i * regs_per_mov),
+ offset(entry->tmp, i * regs_per_mov));
+ copy->exec_size = inst->exec_size;
+ copy->group = inst->group;
copy->force_writemask_all = inst->force_writemask_all;
inst->insert_before(block, copy);
}
--
2.7.4
More information about the mesa-dev
mailing list