[Mesa-dev] [PATCH 4/6] i965: dump scheduling cycle estimates

Fri Oct 2 14:37:32 PDT 2015

The heuristic we're using is rather lame, since it assumes everything is
non-uniform and loops execute 50 times, but it should be enough for
measuring improvements in the scheduler that don't result in a change in
the number of instructions.

Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
---
 src/mesa/drivers/dri/i965/brw_cfg.h                  |  4 ++++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp       | 11 ++++++-----
 .../drivers/dri/i965/brw_schedule_instructions.cpp   | 20 ++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp     |  9 +++++----
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
index a094917..d0bdb00 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -90,6 +90,8 @@ struct bblock_t {
    struct exec_list parents;
    struct exec_list children;
    int num;
+
+   unsigned cycle_count;
 };
 
 static inline struct backend_instruction *
@@ -285,6 +287,8 @@ struct cfg_t {
    int num_blocks;
 
    bool idom_dirty;
+
+   unsigned cycle_count;
 };
 
 /* Note that this is implemented with a double for loop -- break will
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6f8b75e..9540012 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2181,9 +2181,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
 
    if (unlikely(debug_flag)) {
       fprintf(stderr, "Native code for %s\n"
-              "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
+              "SIMD%d shader: %d instructions. %u cycles. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
               " bytes (%.0f%%)\n",
-              shader_name, dispatch_width, before_size / 16, loop_count,
+              shader_name, dispatch_width, before_size / 16, cfg->cycle_count, loop_count,
               spill_count, fill_count, promoted_constants, before_size, after_size,
               100.0f * (before_size - after_size) / before_size);
 
@@ -2193,12 +2193,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
    }
 
    compiler->shader_debug_log(log_data,
-                              "%s SIMD%d shader: %d inst, %d loops, "
+                              "%s SIMD%d shader: %d inst, %u cycles, %d loops, "
                               "%d:%d spills:fills, Promoted %u constants, "
                               "compacted %d to %d bytes.\n",
                               stage_abbrev, dispatch_width, before_size / 16,
-                              loop_count, spill_count, fill_count,
-                              promoted_constants, before_size, after_size);
+                              cfg->cycle_count, loop_count, spill_count,
+                              fill_count, promoted_constants, before_size,
+                              after_size);
 
    return start_offset;
 }
diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
index 1652261..22a493f 100644
--- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
+++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
@@ -1467,6 +1467,24 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
    if (block->end()->opcode == BRW_OPCODE_NOP)
       block->end()->remove(block);
    assert(instructions_to_schedule == 0);
+
+   block->cycle_count = time;
+}
+
+static unsigned get_cycle_count(cfg_t *cfg)
+{
+   unsigned count = 0, multiplier = 1;
+   foreach_block(block, cfg) {
+      if (block->start()->opcode == BRW_OPCODE_DO)
+         multiplier *= 50; /* assume that loops have ~50 instructions */
+
+      count += block->cycle_count * multiplier;
+
+      if (block->end()->opcode == BRW_OPCODE_WHILE)
+         multiplier /= 50;
+   }
+
+   return count;
 }
 
 void
@@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg)
               post_reg_alloc);
       bs->dump_instructions();
    }
+
+   cfg->cycle_count = get_cycle_count(cfg);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index dcacc90..3010352 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1656,10 +1656,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
          fprintf(stderr, "Native code for %s program %d:\n", stage_name,
                  prog->Id);
       }
-      fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
+      fprintf(stderr, "%s vec4 shader: %d instructions. %u cycles. %d loops. Compacted %d to %d"
                       " bytes (%.0f%%)\n",
               stage_abbrev,
-              before_size / 16, loop_count, before_size, after_size,
+              before_size / 16, cfg->cycle_count, loop_count, before_size, after_size,
               100.0f * (before_size - after_size) / before_size);
 
       dump_assembly(p->store, annotation.ann_count, annotation.ann,
@@ -1668,9 +1668,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
    }
 
    compiler->shader_debug_log(log_data,
-                              "%s vec4 shader: %d inst, %d loops, "
+                              "%s vec4 shader: %d inst, %u cycles, %d loops, "
                               "compacted %d to %d bytes.\n",
-                              stage_abbrev, before_size / 16, loop_count,
+                              stage_abbrev, before_size / 16,
+                              cfg->cycle_count, loop_count,
                               before_size, after_size);
 }
 
-- 
2.1.0