[Mesa-dev] [PATCH 1/7] i965/fs: Schedule instructions both before and after register allocation.
Eric Anholt
eric at anholt.net
Fri Dec 7 14:58:12 PST 2012
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +-
src/mesa/drivers/dri/i965/brw_fs.h | 2 +-
.../dri/i965/brw_fs_schedule_instructions.cpp | 90 ++++++++++++++++----
3 files changed, 78 insertions(+), 18 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d77a67e..4d8f06c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2465,7 +2465,7 @@ fs_visitor::run()
remove_dead_constants();
- schedule_instructions();
+ schedule_instructions(false);
assign_curb_setup();
assign_urb_setup();
@@ -2492,6 +2492,8 @@ fs_visitor::run()
if (failed)
return false;
+ schedule_instructions(true);
+
if (dispatch_width == 8) {
c->prog_data.reg_blocks = brw_register_blocks(grf_used);
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 51efc11..0dab222 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -303,7 +303,7 @@ public:
bool remove_dead_constants();
bool remove_duplicate_mrf_writes();
bool virtual_grf_interferes(int a, int b);
- void schedule_instructions();
+ void schedule_instructions(bool post_reg_alloc);
void fail(const char *msg, ...);
void push_force_uncompressed();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
index 6b7c412..f57b23f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp
@@ -112,13 +112,15 @@ public:
class instruction_scheduler {
public:
- instruction_scheduler(fs_visitor *v, void *mem_ctx, int virtual_grf_count)
+ instruction_scheduler(fs_visitor *v, void *mem_ctx, int grf_count,
+ bool post_reg_alloc)
{
this->v = v;
this->mem_ctx = ralloc_context(mem_ctx);
- this->virtual_grf_count = virtual_grf_count;
+ this->grf_count = grf_count;
this->instructions.make_empty();
this->instructions_to_schedule = 0;
+ this->post_reg_alloc = post_reg_alloc;
}
~instruction_scheduler()
@@ -137,8 +139,9 @@ public:
void *mem_ctx;
+ bool post_reg_alloc;
int instructions_to_schedule;
- int virtual_grf_count;
+ int grf_count;
exec_list instructions;
fs_visitor *v;
};
@@ -247,7 +250,12 @@ instruction_scheduler::is_compressed(fs_inst *inst)
void
instruction_scheduler::calculate_deps()
{
- schedule_node *last_grf_write[virtual_grf_count];
+ /* Pre-register-allocation, this tracks the last write per VGRF (so
+ * different reg_offsets within it can interfere when they shouldn't).
+ * After register allocation, reg_offsets are gone and we track individual
+ * GRF registers.
+ */
+ schedule_node *last_grf_write[grf_count];
schedule_node *last_mrf_write[BRW_MAX_MRF];
schedule_node *last_conditional_mod = NULL;
/* Fixed HW registers are assumed to be separate from the virtual
@@ -256,6 +264,7 @@ instruction_scheduler::calculate_deps()
* granular level.
*/
schedule_node *last_fixed_grf_write = NULL;
+ int reg_width = v->dispatch_width / 8;
/* The last instruction always needs to still be the last
* instruction. Either it's flow control (IF, ELSE, ENDIF, DO,
@@ -277,11 +286,21 @@ instruction_scheduler::calculate_deps()
/* read-after-write deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- add_dep(last_grf_write[inst->src[i].reg], n);
+ if (post_reg_alloc) {
+ for (int r = 0; r < reg_width; r++)
+ add_dep(last_grf_write[inst->src[i].reg + r], n);
+ } else {
+ add_dep(last_grf_write[inst->src[i].reg], n);
+ }
} else if (inst->src[i].file == FIXED_HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
- add_dep(last_fixed_grf_write, n);
+ if (post_reg_alloc) {
+ for (int r = 0; r < reg_width; r++)
+ add_dep(last_grf_write[inst->src[i].fixed_hw_reg.nr + r], n);
+ } else {
+ add_dep(last_fixed_grf_write, n);
+ }
} else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
@@ -305,8 +324,15 @@ instruction_scheduler::calculate_deps()
/* write-after-write deps. */
if (inst->dst.file == GRF) {
- add_dep(last_grf_write[inst->dst.reg], n);
- last_grf_write[inst->dst.reg] = n;
+ if (post_reg_alloc) {
+ for (int r = 0; r < inst->regs_written() * reg_width; r++) {
+ add_dep(last_grf_write[inst->dst.reg + r], n);
+ last_grf_write[inst->dst.reg + r] = n;
+ }
+ } else {
+ add_dep(last_grf_write[inst->dst.reg], n);
+ last_grf_write[inst->dst.reg] = n;
+ }
} else if (inst->dst.file == MRF) {
int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
@@ -322,7 +348,12 @@ instruction_scheduler::calculate_deps()
}
} else if (inst->dst.file == FIXED_HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
- last_fixed_grf_write = n;
+ if (post_reg_alloc) {
+ for (int r = 0; r < reg_width; r++)
+ last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+ } else {
+ last_fixed_grf_write = n;
+ }
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -361,12 +392,22 @@ instruction_scheduler::calculate_deps()
/* write-after-read deps. */
for (int i = 0; i < 3; i++) {
if (inst->src[i].file == GRF) {
- add_dep(n, last_grf_write[inst->src[i].reg]);
+ if (post_reg_alloc) {
+ for (int r = 0; r < reg_width; r++)
+ add_dep(n, last_grf_write[inst->src[i].reg + r]);
+ } else {
+ add_dep(n, last_grf_write[inst->src[i].reg]);
+ }
} else if (inst->src[i].file == FIXED_HW_REG &&
(inst->src[i].fixed_hw_reg.file ==
BRW_GENERAL_REGISTER_FILE)) {
- add_dep(n, last_fixed_grf_write);
- } else if (inst->src[i].file != BAD_FILE &&
+ if (post_reg_alloc) {
+ for (int r = 0; r < reg_width; r++)
+ add_dep(n, last_grf_write[inst->src[i].fixed_hw_reg.nr + r]);
+ } else {
+ add_dep(n, last_fixed_grf_write);
+ }
+ } else if (inst->src[i].file != BAD_FILE &&
inst->src[i].file != IMM &&
inst->src[i].file != UNIFORM) {
assert(inst->src[i].file != MRF);
@@ -390,7 +431,12 @@ instruction_scheduler::calculate_deps()
* can mark this as WAR dependency.
*/
if (inst->dst.file == GRF) {
- last_grf_write[inst->dst.reg] = n;
+ if (post_reg_alloc) {
+ for (int r = 0; r < inst->regs_written() * reg_width; r++)
+ last_grf_write[inst->dst.reg + r] = n;
+ } else {
+ last_grf_write[inst->dst.reg] = n;
+ }
} else if (inst->dst.file == MRF) {
int reg = inst->dst.reg & ~BRW_MRF_COMPR4;
@@ -406,7 +452,12 @@ instruction_scheduler::calculate_deps()
}
} else if (inst->dst.file == FIXED_HW_REG &&
inst->dst.fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
- last_fixed_grf_write = n;
+ if (post_reg_alloc) {
+ for (int r = 0; r < reg_width; r++)
+ last_grf_write[inst->dst.fixed_hw_reg.nr + r] = n;
+ } else {
+ last_fixed_grf_write = n;
+ }
} else if (inst->dst.file != BAD_FILE) {
add_barrier_deps(n);
}
@@ -500,10 +551,17 @@ instruction_scheduler::schedule_instructions(fs_inst *next_block_header)
}
void
-fs_visitor::schedule_instructions()
+fs_visitor::schedule_instructions(bool post_reg_alloc)
{
fs_inst *next_block_header = (fs_inst *)instructions.head;
- instruction_scheduler sched(this, mem_ctx, this->virtual_grf_count);
+
+ int grf_count;
+ if (post_reg_alloc)
+ grf_count = grf_used;
+ else
+ grf_count = virtual_grf_count;
+
+ instruction_scheduler sched(this, mem_ctx, grf_count, post_reg_alloc);
while (!next_block_header->is_tail_sentinel()) {
/* Add things to be scheduled until we get to a new BB. */
--
1.7.10.4
More information about the mesa-dev
mailing list