[Mesa-dev] [PATCH v2 13/16] i965: Move fs_visitor ra pass to new fs_visitor::allocate_registers()
Kristian Høgsberg
krh at bitplanet.net
Thu Nov 13 16:28:19 PST 2014
This will be reused for the scalar VS pass.
Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 132 +++++++++++++++++++----------------
src/mesa/drivers/dri/i965/brw_fs.h | 1 +
2 files changed, 71 insertions(+), 62 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cb73b9f..4dce0a2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3538,11 +3538,79 @@ fs_visitor::optimize()
lower_uniform_pull_constant_loads();
}
+void
+fs_visitor::allocate_registers()
+{
+ bool allocated_without_spills;
+
+ static enum instruction_scheduler_mode pre_modes[] = {
+ SCHEDULE_PRE,
+ SCHEDULE_PRE_NON_LIFO,
+ SCHEDULE_PRE_LIFO,
+ };
+
+ /* Try each scheduling heuristic to see if it can successfully register
+ * allocate without spilling. They should be ordered by decreasing
+ * performance but increasing likelihood of allocating.
+ */
+ for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
+ schedule_instructions(pre_modes[i]);
+
+ if (0) {
+ assign_regs_trivial();
+ allocated_without_spills = true;
+ } else {
+ allocated_without_spills = assign_regs(false);
+ }
+ if (allocated_without_spills)
+ break;
+ }
+
+ if (!allocated_without_spills) {
+ /* We assume that any spilling is worse than just dropping back to
+ * SIMD8. There's probably actually some intermediate point where
+ * SIMD16 with a couple of spills is still better.
+ */
+ if (dispatch_width == 16) {
+ fail("Failure to register allocate. Reduce number of "
+ "live scalar values to avoid this.");
+ } else {
+ perf_debug("Fragment shader triggered register spilling. "
+ "Try reducing the number of live scalar values to "
+ "improve performance.\n");
+ }
+
+ /* Since we're out of heuristics, just go spill registers until we
+ * get an allocation.
+ */
+ while (!assign_regs(true)) {
+ if (failed)
+ break;
+ }
+ }
+
+ assert(force_uncompressed_stack == 0);
+
+ /* This must come after all optimization and register allocation, since
+ * it inserts dead code that happens to have side effects, and it does
+ * so based on the actual physical registers in use.
+ */
+ insert_gen4_send_dependency_workarounds();
+
+ if (failed)
+ return;
+
+ if (!allocated_without_spills)
+ schedule_instructions(SCHEDULE_POST);
+
+ if (last_scratch > 0)
+ prog_data->total_scratch = brw_get_scratch_size(last_scratch);
+}
+
bool
fs_visitor::run()
{
sanity_param_count = prog->Parameters->NumParameters;
- bool allocated_without_spills;
assign_binding_table_offsets();
@@ -3555,7 +3623,6 @@ fs_visitor::run()
emit_dummy_fs();
} else if (brw->use_rep_send && dispatch_width == 16) {
emit_repclear_shader();
- allocated_without_spills = true;
} else {
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
emit_shader_time_begin();
@@ -3610,68 +3677,9 @@ fs_visitor::run()
assign_curb_setup();
assign_urb_setup();
- static enum instruction_scheduler_mode pre_modes[] = {
- SCHEDULE_PRE,
- SCHEDULE_PRE_NON_LIFO,
- SCHEDULE_PRE_LIFO,
- };
-
- /* Try each scheduling heuristic to see if it can successfully register
- * allocate without spilling. They should be ordered by decreasing
- * performance but increasing likelihood of allocating.
- */
- for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
- schedule_instructions(pre_modes[i]);
-
- if (0) {
- assign_regs_trivial();
- allocated_without_spills = true;
- } else {
- allocated_without_spills = assign_regs(false);
- }
- if (allocated_without_spills)
- break;
- }
-
- if (!allocated_without_spills) {
- /* We assume that any spilling is worse than just dropping back to
- * SIMD8. There's probably actually some intermediate point where
- * SIMD16 with a couple of spills is still better.
- */
- if (dispatch_width == 16) {
- fail("Failure to register allocate. Reduce number of "
- "live scalar values to avoid this.");
- } else {
- perf_debug("Fragment shader triggered register spilling. "
- "Try reducing the number of live scalar values to "
- "improve performance.\n");
- }
-
- /* Since we're out of heuristics, just go spill registers until we
- * get an allocation.
- */
- while (!assign_regs(true)) {
- if (failed)
- break;
- }
- }
-
- assert(force_uncompressed_stack == 0);
-
- /* This must come after all optimization and register allocation, since
- * it inserts dead code that happens to have side effects, and it does
- * so based on the actual physical registers in use.
- */
- insert_gen4_send_dependency_workarounds();
-
+ allocate_registers();
if (failed)
return false;
-
- if (!allocated_without_spills)
- schedule_instructions(SCHEDULE_POST);
-
- if (last_scratch > 0)
- prog_data->total_scratch = brw_get_scratch_size(last_scratch);
}
if (stage == MESA_SHADER_FRAGMENT) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 31c3001..bb6f767 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -407,6 +407,7 @@ public:
bool run();
void optimize();
+ void allocate_registers();
void assign_binding_table_offsets();
void setup_payload_gen4();
void setup_payload_gen6();
--
2.1.0
More information about the mesa-dev
mailing list