[Mesa-dev] [PATCH 6/6] i965: Add a debug flag for counting cycles spent in each compiled shader.

Kenneth Graunke kenneth at whitecape.org
Fri Nov 30 14:54:55 PST 2012


On 11/30/2012 01:24 PM, Eric Anholt wrote:
> This can be used for two purposes: Using hand-coded shaders to determine
> per-instruction timings, or figuring out which shader to optimize in a
> whole application.  On the psychonauts trace, we see:
>
> type   ID      cycles spent                   % of total
> ...
> fs16  543:     339343898369 ( 339.34 Gcycles)       9.2%
> fs16  521:     532215110990 ( 532.22 Gcycles)      14.4%
> fs16  524:    1036231987390 (1036.23 Gcycles)      28.0%
>
> confirming our previous understanding that fragment shaders are where
> it's all at.  But on GLBenchmark 2.7, we get:
>
> fs16   69:     205928219888 ( 205.93 Gcycles)       7.5%
> fs16   75:     364066413095 ( 364.07 Gcycles)      13.2%
> vs     87:    1107217698878 (1107.22 Gcycles)      40.3%
>
> That's interesting.  I should look into that.
>
> Note that this doesn't cover the instructions that set up the message to
> the URB/FB write -- we'd need to convert the MRF usage in these
> instructions to GRFs so that our offsets/times don't overwrite our
> shader outputs.
> ---
>   src/mesa/drivers/dri/i965/brw_context.c          |    3 +
>   src/mesa/drivers/dri/i965/brw_context.h          |   28 ++++-
>   src/mesa/drivers/dri/i965/brw_defines.h          |   20 +++-
>   src/mesa/drivers/dri/i965/brw_eu.h               |    6 +-
>   src/mesa/drivers/dri/i965/brw_eu_emit.c          |   55 +++++++++-
>   src/mesa/drivers/dri/i965/brw_fs.cpp             |  101 +++++++++++++++++
>   src/mesa/drivers/dri/i965/brw_fs.h               |    7 ++
>   src/mesa/drivers/dri/i965/brw_fs_emit.cpp        |    4 +
>   src/mesa/drivers/dri/i965/brw_program.c          |  128 ++++++++++++++++++++++
>   src/mesa/drivers/dri/i965/brw_vec4.cpp           |   81 ++++++++++++++
>   src/mesa/drivers/dri/i965/brw_vec4.h             |    7 ++
>   src/mesa/drivers/dri/i965/brw_vec4_emit.cpp      |    4 +
>   src/mesa/drivers/dri/i965/brw_vs_surface_state.c |   10 ++
>   src/mesa/drivers/dri/i965/brw_vtbl.c             |   14 +++
>   src/mesa/drivers/dri/i965/brw_wm_surface_state.c |    7 ++
>   src/mesa/drivers/dri/intel/intel_context.c       |    6 +
>   src/mesa/drivers/dri/intel/intel_context.h       |    1 +
>   17 files changed, 475 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 4b1b247..5665a3a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -383,6 +383,9 @@ brwCreateContext(int api,
>
>      brw_fs_alloc_reg_sets(brw);
>
> +   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
> +      brw_init_shader_time(brw);
> +
>      return true;
>   }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 1abaee3..dc25cab 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -559,14 +559,15 @@ struct brw_vs_prog_data {
>   #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
>   #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
>   #define SURF_INDEX_WM_UBO(u)         (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + u)
> -
> +#define SURF_INDEX_WM_SHADER_TIME    (SURF_INDEX_WM_UBO(12))
>   /** Maximum size of the binding table. */
> -#define BRW_MAX_WM_SURFACES          SURF_INDEX_WM_UBO(BRW_MAX_WM_UBOS)
> +#define BRW_MAX_WM_SURFACES          (SURF_INDEX_WM_SHADER_TIME + 1)
>
>   #define SURF_INDEX_VERT_CONST_BUFFER (0)
>   #define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
>   #define SURF_INDEX_VS_UBO(u)         (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u)
> -#define BRW_MAX_VS_SURFACES          SURF_INDEX_VS_UBO(BRW_MAX_VS_UBOS)
> +#define SURF_INDEX_VS_SHADER_TIME    (SURF_INDEX_VS_UBO(12))
> +#define BRW_MAX_VS_SURFACES          (SURF_INDEX_VS_SHADER_TIME + 1)
>
>   #define SURF_INDEX_SOL_BINDING(t)    ((t))
>   #define BRW_MAX_GS_SURFACES          SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
> @@ -651,6 +652,13 @@ struct brw_tracked_state {
>      void (*emit)( struct brw_context *brw );
>   };
>
> +enum shader_time_shader_type {
> +   ST_NONE,
> +   ST_VS,
> +   ST_FS8,
> +   ST_FS16,
> +};
> +
>   /* Flags for brw->state.cache.
>    */
>   #define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
> @@ -1089,6 +1097,16 @@ struct brw_context
>
>      uint32_t num_instances;
>      int basevertex;
> +
> +   struct {
> +      drm_intel_bo *bo;
> +      struct gl_shader_program **programs;
> +      enum shader_time_shader_type *types;
> +      uint64_t *cumulative;
> +      int num_entries;
> +      int max_entries;
> +      double report_time;
> +   } shader_time;
>   };
>
>   /*======================================================================
> @@ -1144,7 +1162,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
>   int brw_get_scratch_size(int size);
>   void brw_get_scratch_bo(struct intel_context *intel,
>   			drm_intel_bo **scratch_bo, int size);
> -
> +void brw_init_shader_time(struct brw_context *brw);
> +void brw_collect_and_report_shader_time(struct brw_context *brw);
> +void brw_destroy_shader_time(struct brw_context *brw);
>
>   /* brw_urb.c
>    */
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 6dc4707..b84d8f9 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -665,6 +665,8 @@ enum opcode {
>      SHADER_OPCODE_TXS,
>      FS_OPCODE_TXB,
>
> +   SHADER_OPCODE_SHADER_TIME_ADD,
> +
>      FS_OPCODE_DDX,
>      FS_OPCODE_DDY,
>      FS_OPCODE_PIXEL_X,
> @@ -729,6 +731,8 @@ enum opcode {
>   #define BRW_ARF_CONTROL               0x80
>   #define BRW_ARF_NOTIFICATION_COUNT    0x90
>   #define BRW_ARF_IP                    0xA0
> +#define BRW_ARF_TDR                   0xB0
> +#define BRW_ARF_TIMESTAMP             0xC0
>
>   #define BRW_MRF_COMPR4			(1 << 7)
>
> @@ -956,7 +960,21 @@ enum brw_message_target {
>   #define BRW_SCRATCH_SPACE_SIZE_1M     10
>   #define BRW_SCRATCH_SPACE_SIZE_2M     11
>

Maybe add an /** URB Atomic Operations */ comment here?

> -
> +#define BRW_AOP_AND                   1
> +#define BRW_AOP_OR                    2
> +#define BRW_AOP_XOR                   3
> +#define BRW_AOP_MOV                   4
> +#define BRW_AOP_INC                   5
> +#define BRW_AOP_DEC                   6
> +#define BRW_AOP_ADD                   7
> +#define BRW_AOP_SUB                   8
> +#define BRW_AOP_REVSUB                9
> +#define BRW_AOP_IMAX                  10
> +#define BRW_AOP_IMIN                  11
> +#define BRW_AOP_UMAX                  12
> +#define BRW_AOP_UMIN                  13
> +#define BRW_AOP_CMPWR                 14
> +#define BRW_AOP_PREDEC                15

This looks awesome.  Series is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>



More information about the mesa-dev mailing list