[Mesa-dev] [PATCH 4/6] i965/fs: Reduce the interference between payload regs and virtual GRFs.
Eric Anholt
eric at anholt.net
Tue Oct 2 19:52:03 PDT 2012
Improves performance of the Lightsmark penumbra shadows scene by 15.7% +/-
1.0% (n=15), by eliminating register spilling. (tested by smashing the list of
scenes to have all other scenes have 0 duration -- includes additional
rendering of scene description text that normally doesn't appear in that
scene)
---
src/mesa/drivers/dri/i965/brw_fs.h | 2 +
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 86 +++++++++++++++++----
2 files changed, 74 insertions(+), 14 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 34747d3..56c5a27 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -235,6 +235,8 @@ public:
void assign_urb_setup();
bool assign_regs();
void assign_regs_trivial();
+ void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
+ int first_payload_node);
int choose_spill_reg(struct ra_graph *g);
void spill_reg(int spill_reg);
void split_virtual_grfs();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
index 7b778d6..0510977 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -164,12 +164,78 @@ brw_alloc_reg_set(struct brw_context *brw, int reg_width, int base_reg_count)
* Sets up interference between thread payload registers and the virtual GRFs
* to be allocated for program temporaries.
*/
-static void
-brw_setup_payload_interference(struct ra_graph *g,
- int payload_reg_count,
- int first_payload_node,
- int reg_node_count)
+void
+fs_visitor::setup_payload_interference(struct ra_graph *g,
+ int payload_reg_count,
+ int first_payload_node)
{
+ int reg_width = c->dispatch_width / 8;
+ int last_loop_end = 0;
+ int first_loop_start = 0;
+
+ /* We don't track live intervals for payload regs in our live interval
+ * analysis. Do a really cheesy version in this function: payload regs are
+ * live from the start of the program (always true) until either their last
+ * use, or the end of looping.
+ */
+ int ip = 0;
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ if (inst->opcode == BRW_OPCODE_DO && first_loop_start == 0)
+ first_loop_start = ip;
+ else if (inst->opcode == BRW_OPCODE_WHILE)
+ last_loop_end = ip;
+
+ ip++;
+ }
+
+ int payload_use_ip[payload_reg_count];
+ memset(payload_use_ip, 0, sizeof(payload_use_ip));
+ ip = 0;
+ foreach_list(node, &this->instructions) {
+ fs_inst *inst = (fs_inst *)node;
+
+ /* Note that UNIFORM args have been turned into FIXED_HW_REG by
+ * assign_curbe_setup(), and interpolation uses fixed hardware regs from
+ * the start (see interp_reg()).
+ */
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file == FIXED_HW_REG &&
+ inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
+ int reg_nr = inst->src[i].fixed_hw_reg.nr / reg_width;
+ if (reg_nr < payload_reg_count) {
+ if (ip < first_loop_start)
+ payload_use_ip[reg_nr] = ip;
+ else
+ payload_use_ip[reg_nr] = MAX2(ip, last_loop_end);
+ }
+ }
+ }
+ ip++;
+ }
+
+ /* g0/g1 are implied used by the FB_WRITE messages, but not present as regs
+ * in the various instructions. Similarly, other weird payload bits up
+ * until the first push constant might be used and I don't want to think
+ * about them right now.
+ */
+ for (unsigned int i = 0; i < c->nr_payload_regs; i++) {
+ payload_use_ip[i / reg_width] = ip;
+ }
+
+ for (int i = 0; i < payload_reg_count; i++) {
+ /* Mark the payload reg as interfering with any virtual grf that is live
+ * between the start of the program and our last use of the payload reg.
+ */
+ for (int j = 0; j < this->virtual_grf_count; j++) {
+ if (this->virtual_grf_def[j] <= payload_use_ip[i] ||
+ this->virtual_grf_use[j] <= payload_use_ip[i]) {
+ ra_add_node_interference(g, first_payload_node + i, j);
+ }
+ }
+ }
+
for (int i = 0; i < payload_reg_count; i++) {
/* Mark each payload reg node as being allocated to its physical register.
*
@@ -177,13 +243,6 @@ brw_setup_payload_interference(struct ra_graph *g,
* would just be silly.
*/
ra_set_node_reg(g, first_payload_node + i, i);
-
- /* For now, just mark each payload node as interfering with every other
- * node to be allocated.
- */
- for (int j = 0; j < reg_node_count; j++) {
- ra_add_node_interference(g, first_payload_node + i, j);
- }
}
}
@@ -240,8 +299,7 @@ fs_visitor::assign_regs()
}
}
- brw_setup_payload_interference(g, payload_reg_count, first_payload_node,
- this->virtual_grf_count);
+ setup_payload_interference(g, payload_reg_count, first_payload_node);
if (!ra_allocate_no_spills(g)) {
/* Failed to allocate registers. Spill a reg, and the caller will
--
1.7.10.4
More information about the mesa-dev
mailing list