[Mesa-dev] [PATCH 4/6] i965: dump scheduling cycle estimates
Iago Toral
itoral at igalia.com
Mon Oct 5 04:16:32 PDT 2015
On Fri, 2015-10-02 at 17:43 -0400, Connor Abbott wrote:
> On Fri, Oct 2, 2015 at 5:37 PM, Connor Abbott <cwabbott0 at gmail.com> wrote:
> > The heuristic we're using is rather lame, since it assumes everything is
> > non-uniform and loops execute 50 times, but it should be enough for
> > measuring improvements in the scheduler that don't result in a change in
> > the number of instructions.
Our spilling code assumes that loops run 10 times for the purpose of
evaluating spilling costs... shouldn't we use the same estimation
everywhere?
Iago
> > Signed-off-by: Connor Abbott <cwabbott0 at gmail.com>
> > ---
> > src/mesa/drivers/dri/i965/brw_cfg.h | 4 ++++
> > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 11 ++++++-----
> > .../drivers/dri/i965/brw_schedule_instructions.cpp | 20 ++++++++++++++++++++
> > src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 9 +++++----
> > 4 files changed, 35 insertions(+), 9 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h
> > index a094917..d0bdb00 100644
> > --- a/src/mesa/drivers/dri/i965/brw_cfg.h
> > +++ b/src/mesa/drivers/dri/i965/brw_cfg.h
> > @@ -90,6 +90,8 @@ struct bblock_t {
> > struct exec_list parents;
> > struct exec_list children;
> > int num;
> > +
> > + unsigned cycle_count;
> > };
> >
> > static inline struct backend_instruction *
> > @@ -285,6 +287,8 @@ struct cfg_t {
> > int num_blocks;
> >
> > bool idom_dirty;
> > +
> > + unsigned cycle_count;
> > };
> >
> > /* Note that this is implemented with a double for loop -- break will
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index 6f8b75e..9540012 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -2181,9 +2181,9 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
> >
> > if (unlikely(debug_flag)) {
> > fprintf(stderr, "Native code for %s\n"
> > - "SIMD%d shader: %d instructions. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
> > + "SIMD%d shader: %d instructions. %u cycles. %d loops. %d:%d spills:fills. Promoted %u constants. Compacted %d to %d"
> > " bytes (%.0f%%)\n",
> > - shader_name, dispatch_width, before_size / 16, loop_count,
> > + shader_name, dispatch_width, before_size / 16, cfg->cycle_count, loop_count,
> > spill_count, fill_count, promoted_constants, before_size, after_size,
> > 100.0f * (before_size - after_size) / before_size);
> >
> > @@ -2193,12 +2193,13 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
> > }
> >
> > compiler->shader_debug_log(log_data,
> > - "%s SIMD%d shader: %d inst, %d loops, "
> > + "%s SIMD%d shader: %d inst, %u cycles, %d loops, "
> > "%d:%d spills:fills, Promoted %u constants, "
> > "compacted %d to %d bytes.\n",
> > stage_abbrev, dispatch_width, before_size / 16,
> > - loop_count, spill_count, fill_count,
> > - promoted_constants, before_size, after_size);
> > + cfg->cycle_count, loop_count, spill_count,
> > + fill_count, promoted_constants, before_size,
> > + after_size);
> >
> > return start_offset;
> > }
> > diff --git a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> > index 1652261..22a493f 100644
> > --- a/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_schedule_instructions.cpp
> > @@ -1467,6 +1467,24 @@ instruction_scheduler::schedule_instructions(bblock_t *block)
> > if (block->end()->opcode == BRW_OPCODE_NOP)
> > block->end()->remove(block);
> > assert(instructions_to_schedule == 0);
> > +
> > + block->cycle_count = time;
> > +}
> > +
> > +static unsigned get_cycle_count(cfg_t *cfg)
> > +{
> > + unsigned count = 0, multiplier = 1;
> > + foreach_block(block, cfg) {
> > + if (block->start()->opcode == BRW_OPCODE_DO)
> > + multiplier *= 50; /* assume that loops have ~50 instructions */
>
> Whoops, this should say "assume that loops are run ~50 times"...
>
> > +
> > + count += block->cycle_count * multiplier;
> > +
> > + if (block->end()->opcode == BRW_OPCODE_WHILE)
> > + multiplier /= 50;
> > + }
> > +
> > + return count;
> > }
> >
> > void
> > @@ -1507,6 +1525,8 @@ instruction_scheduler::run(cfg_t *cfg)
> > post_reg_alloc);
> > bs->dump_instructions();
> > }
> > +
> > + cfg->cycle_count = get_cycle_count(cfg);
> > }
> >
> > void
> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> > index dcacc90..3010352 100644
> > --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
> > @@ -1656,10 +1656,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
> > fprintf(stderr, "Native code for %s program %d:\n", stage_name,
> > prog->Id);
> > }
> > - fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
> > + fprintf(stderr, "%s vec4 shader: %d instructions. %u cycles. %d loops. Compacted %d to %d"
> > " bytes (%.0f%%)\n",
> > stage_abbrev,
> > - before_size / 16, loop_count, before_size, after_size,
> > + before_size / 16, cfg->cycle_count, loop_count, before_size, after_size,
> > 100.0f * (before_size - after_size) / before_size);
> >
> > dump_assembly(p->store, annotation.ann_count, annotation.ann,
> > @@ -1668,9 +1668,10 @@ vec4_generator::generate_code(const cfg_t *cfg)
> > }
> >
> > compiler->shader_debug_log(log_data,
> > - "%s vec4 shader: %d inst, %d loops, "
> > + "%s vec4 shader: %d inst, %u cycles, %d loops, "
> > "compacted %d to %d bytes.\n",
> > - stage_abbrev, before_size / 16, loop_count,
> > + stage_abbrev, before_size / 16,
> > + cfg->cycle_count, loop_count,
> > before_size, after_size);
> > }
> >
> > --
> > 2.1.0
> >
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list