[Mesa-dev] [PATCH 4/7] i965/sched: switch to register pressure scheduling dynamically

Connor Abbott cwabbott0 at gmail.com
Fri Oct 30 18:02:55 PDT 2015


Now that we keep track of the register pressure, and we don't need to
fall back to the LIFO heuristic, it's trivial to be able to switch to a
different heuristic when our register pressure exceeds a certain
threshold, without worrying about which heuristic. We still need to try
to schedule multiple times, though, since for some shaders, a high
threshold will yield a better spill-free program than a low threshold,
while for other programs we might not be able to allocate without
spilling until we drop the threshold quite low. The parameters were
determined by increasing the tolerance until there weren't any more
SIMD16 programs that failed to compile because they spilled.

total instructions in shared programs: 7393571 -> 7393504 (-0.00%)
instructions in affected programs: 32346 -> 32279 (-0.21%)
helped: 6
HURT: 11

total cycles in shared programs: 49065480 -> 48190694 (-1.78%)
cycles in affected programs: 5383866 -> 4509080 (-16.25%)
helped: 1580
HURT: 407

LOST:   10
GAINED: 45
Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_fs.cpp               | 34 ++++++++++++---------
 src/mesa/drivers/dri/i965/brw_fs.h                 |  6 +++-
 .../drivers/dri/i965/brw_schedule_instructions.cpp | 35 +++++++++++++++++-----
 3 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 72b3677..c922ce7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -4927,17 +4927,23 @@ fs_visitor::allocate_registers()
 {
    bool allocated_without_spills;
 
-   static const enum instruction_scheduler_mode pre_modes[] = {
-      SCHEDULE_PRE,
-      SCHEDULE_PRE_NON_LIFO,
-   };
-
-   /* Try each scheduling heuristic to see if it can successfully register
-    * allocate without spilling.  They should be ordered by decreasing
-    * performance but increasing likelihood of allocating.
+   unsigned reg_pressure_threshold = 124;
+   unsigned max_reg_pressure = 200;
+   unsigned prev_max_reg_pressure;
+
+   /* the number of times we decreased the threshold and the actual register
+    * pressure didn't decrease
     */
-   for (unsigned i = 0; i < ARRAY_SIZE(pre_modes); i++) {
-      schedule_instructions(pre_modes[i]);
+   unsigned num_missed = 0; 
+   do {
+      prev_max_reg_pressure = max_reg_pressure;
+      max_reg_pressure = schedule_instructions(reg_pressure_threshold,
+                                               SCHEDULE_PRE);
+
+      if (max_reg_pressure >= prev_max_reg_pressure)
+         num_missed++;
+      else
+         num_missed = 0;
 
       if (0) {
          assign_regs_trivial();
@@ -4945,9 +4951,9 @@ fs_visitor::allocate_registers()
       } else {
          allocated_without_spills = assign_regs(false);
       }
-      if (allocated_without_spills)
-         break;
-   }
+
+      reg_pressure_threshold -= 8;
+   } while(!allocated_without_spills && num_missed < 8);
 
    if (!allocated_without_spills) {
       /* We assume that any spilling is worse than just dropping back to
@@ -4983,7 +4989,7 @@ fs_visitor::allocate_registers()
    if (failed)
       return;
 
-   schedule_instructions(SCHEDULE_POST);
+   schedule_instructions(0, SCHEDULE_POST);
 
    if (last_scratch > 0)
       prog_data->total_scratch = brw_get_scratch_size(last_scratch);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 8058b34..10c0634 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -177,7 +177,11 @@ public:
 
    bool opt_sampler_eot();
    bool virtual_grf_interferes(int a, int b);
-   void schedule_instructions(instruction_scheduler_mode mode);
+
+   /* returns the maximum register pressure for before register allocation */
+   int schedule_instructions(int reg_pressure_threshold,
+                             instruction_scheduler_mode mode);
+
    void insert_gen4_send_dependency_workarounds();
    void insert_gen4_pre_send_dependency_workarounds(bblock_t *block,
                                                     fs_inst *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 56d91ee..55c28f6 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -396,6 +396,7 @@ class instruction_scheduler {
 public:
    instruction_scheduler(backend_shader *s, int grf_count,
                          int hw_reg_count, int block_count,
+                         int pressure_threshold,
                          instruction_scheduler_mode mode)
    {
       this->bs = s;
@@ -405,6 +406,7 @@ public:
       this->instructions.make_empty();
       this->instructions_to_schedule = 0;
       this->post_reg_alloc = (mode == SCHEDULE_POST);
+      this->pressure_threshold = pressure_threshold;
       this->mode = mode;
       this->time = 0;
       if (!post_reg_alloc) {
@@ -430,6 +432,8 @@ public:
          this->reads_remaining = rzalloc_array(mem_ctx, int, grf_count);
 
          this->hw_reads_remaining = rzalloc_array(mem_ctx, int, hw_reg_count);
+
+         this->max_reg_pressure = 0;
       } else {
          this->reg_pressure_in = NULL;
          this->livein = NULL;
@@ -483,9 +487,15 @@ public:
    exec_list instructions;
    backend_shader *bs;
 
+   int pressure_threshold;
    instruction_scheduler_mode mode;
 
    /*
+    * The maximum register pressure encountered in the shader.
+    */
+   int max_reg_pressure;
+
+   /*
     * The register pressure at the beginning of each basic block.
     */
 
@@ -532,7 +542,7 @@ class fs_instruction_scheduler : public instruction_scheduler
 {
 public:
    fs_instruction_scheduler(fs_visitor *v, int grf_count, int hw_reg_count,
-                            int block_count,
+                            int block_count, int pressure_threshold,
                             instruction_scheduler_mode mode);
    void calculate_deps();
    bool is_compressed(fs_inst *inst);
@@ -549,8 +559,10 @@ public:
 fs_instruction_scheduler::fs_instruction_scheduler(fs_visitor *v,
                                                    int grf_count, int hw_reg_count,
                                                    int block_count,
+                                                   int pressure_threshold,
                                                    instruction_scheduler_mode mode)
-   : instruction_scheduler(v, grf_count, hw_reg_count, block_count, mode),
+   : instruction_scheduler(v, grf_count, hw_reg_count, block_count,
+                           pressure_threshold, mode),
      v(v)
 {
 }
@@ -727,7 +739,7 @@ public:
 
 vec4_instruction_scheduler::vec4_instruction_scheduler(vec4_visitor *v,
                                                        int grf_count)
-   : instruction_scheduler(v, grf_count, 0, 0, SCHEDULE_POST),
+   : instruction_scheduler(v, grf_count, 0, 0, 0, SCHEDULE_POST),
      v(v)
 {
 }
@@ -1388,7 +1400,7 @@ fs_instruction_scheduler::choose_instruction_to_schedule()
 {
    schedule_node *chosen = NULL;
 
-   if (mode == SCHEDULE_PRE || mode == SCHEDULE_POST) {
+   if (post_reg_alloc || reg_pressure < pressure_threshold) {
       int chosen_unblocked_time = 0, chosen_delay = 0;
 
       /* First, find the earliest instruction we can possibly schedule. Then,
@@ -1505,8 +1517,10 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
    const struct brw_device_info *devinfo = bs->devinfo;
    backend_instruction *inst = block->end();
    time = 0;
-   if (!post_reg_alloc)
+   if (!post_reg_alloc) {
       reg_pressure = reg_pressure_in[block->num];
+      max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
+   }
    block_idx = block->num;
 
    /* Remove non-DAG heads from the list. */
@@ -1527,6 +1541,7 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
       if (!post_reg_alloc) {
          reg_pressure -= get_register_pressure_benefit(chosen->inst);
          update_register_pressure(chosen->inst);
+         max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
       }
 
       /* If we expected a delay for scheduling, then bump the clock to reflect
@@ -1658,8 +1673,9 @@ instruction_scheduler::run(cfg_t *cfg)
    cfg->cycle_count = get_cycle_count(cfg);
 }
 
-void
-fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
+int
+fs_visitor::schedule_instructions(int reg_pressure_threshold,
+                                  instruction_scheduler_mode mode)
 {
    calculate_live_intervals();
 
@@ -1670,7 +1686,8 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
       grf_count = alloc.count;
 
    fs_instruction_scheduler sched(this, grf_count, first_non_payload_grf,
-                                  cfg->num_blocks, mode);
+                                  cfg->num_blocks, reg_pressure_threshold,
+                                  mode);
    sched.run(cfg);
 
    if (unlikely(debug_enabled) && mode == SCHEDULE_POST) {
@@ -1679,6 +1696,8 @@ fs_visitor::schedule_instructions(instruction_scheduler_mode mode)
    }
 
    invalidate_live_intervals();
+
+   return sched.max_reg_pressure;
 }
 
 void
-- 
2.4.3



More information about the mesa-dev mailing list