[Mesa-dev] [PATCH 6/6] i965: Add a debug flag for counting cycles spent in each compiled shader.
Kenneth Graunke
kenneth at whitecape.org
Fri Nov 30 14:54:55 PST 2012
On 11/30/2012 01:24 PM, Eric Anholt wrote:
> This can be used for two purposes: Using hand-coded shaders to determine
> per-instruction timings, or figuring out which shader to optimize in a
> whole application. On the psychonauts trace, we see:
>
> type ID cycles spent % of total
> ...
> fs16 543: 339343898369 ( 339.34 Gcycles) 9.2%
> fs16 521: 532215110990 ( 532.22 Gcycles) 14.4%
> fs16 524: 1036231987390 (1036.23 Gcycles) 28.0%
>
> confirming our previous understanding that fragment shaders are where
> it's all at. But on GLBenchmark 2.7, we get:
>
> fs16 69: 205928219888 ( 205.93 Gcycles) 7.5%
> fs16 75: 364066413095 ( 364.07 Gcycles) 13.2%
> vs 87: 1107217698878 (1107.22 Gcycles) 40.3%
>
> That's interesting. I should look into that.
>
> Note that this doesn't cover the instructions that set up the message to
> the URB/FB write -- we'd need to convert the MRF usage in these
> instructions to GRFs so that our offsets/times don't overwrite our
> shader outputs.
> ---
> src/mesa/drivers/dri/i965/brw_context.c | 3 +
> src/mesa/drivers/dri/i965/brw_context.h | 28 ++++-
> src/mesa/drivers/dri/i965/brw_defines.h | 20 +++-
> src/mesa/drivers/dri/i965/brw_eu.h | 6 +-
> src/mesa/drivers/dri/i965/brw_eu_emit.c | 55 +++++++++-
> src/mesa/drivers/dri/i965/brw_fs.cpp | 101 +++++++++++++++++
> src/mesa/drivers/dri/i965/brw_fs.h | 7 ++
> src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 4 +
> src/mesa/drivers/dri/i965/brw_program.c | 128 ++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_vec4.cpp | 81 ++++++++++++++
> src/mesa/drivers/dri/i965/brw_vec4.h | 7 ++
> src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 4 +
> src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 10 ++
> src/mesa/drivers/dri/i965/brw_vtbl.c | 14 +++
> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 7 ++
> src/mesa/drivers/dri/intel/intel_context.c | 6 +
> src/mesa/drivers/dri/intel/intel_context.h | 1 +
> 17 files changed, 475 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
> index 4b1b247..5665a3a 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -383,6 +383,9 @@ brwCreateContext(int api,
>
> brw_fs_alloc_reg_sets(brw);
>
> + if (INTEL_DEBUG & DEBUG_SHADER_TIME)
> + brw_init_shader_time(brw);
> +
> return true;
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 1abaee3..dc25cab 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -559,14 +559,15 @@ struct brw_vs_prog_data {
> #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
> #define SURF_INDEX_TEXTURE(t) (BRW_MAX_DRAW_BUFFERS + 2 + (t))
> #define SURF_INDEX_WM_UBO(u) (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + u)
> -
> +#define SURF_INDEX_WM_SHADER_TIME (SURF_INDEX_WM_UBO(12))
> /** Maximum size of the binding table. */
> -#define BRW_MAX_WM_SURFACES SURF_INDEX_WM_UBO(BRW_MAX_WM_UBOS)
> +#define BRW_MAX_WM_SURFACES (SURF_INDEX_WM_SHADER_TIME + 1)
>
> #define SURF_INDEX_VERT_CONST_BUFFER (0)
> #define SURF_INDEX_VS_TEXTURE(t) (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
> #define SURF_INDEX_VS_UBO(u) (SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT) + u)
> -#define BRW_MAX_VS_SURFACES SURF_INDEX_VS_UBO(BRW_MAX_VS_UBOS)
> +#define SURF_INDEX_VS_SHADER_TIME (SURF_INDEX_VS_UBO(12))
> +#define BRW_MAX_VS_SURFACES (SURF_INDEX_VS_SHADER_TIME + 1)
>
> #define SURF_INDEX_SOL_BINDING(t) ((t))
> #define BRW_MAX_GS_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
> @@ -651,6 +652,13 @@ struct brw_tracked_state {
> void (*emit)( struct brw_context *brw );
> };
>
> +enum shader_time_shader_type {
> + ST_NONE,
> + ST_VS,
> + ST_FS8,
> + ST_FS16,
> +};
> +
> /* Flags for brw->state.cache.
> */
> #define CACHE_NEW_BLEND_STATE (1<<BRW_BLEND_STATE)
> @@ -1089,6 +1097,16 @@ struct brw_context
>
> uint32_t num_instances;
> int basevertex;
> +
> + struct {
> + drm_intel_bo *bo;
> + struct gl_shader_program **programs;
> + enum shader_time_shader_type *types;
> + uint64_t *cumulative;
> + int num_entries;
> + int max_entries;
> + double report_time;
> + } shader_time;
> };
>
> /*======================================================================
> @@ -1144,7 +1162,9 @@ void brwInitFragProgFuncs( struct dd_function_table *functions );
> int brw_get_scratch_size(int size);
> void brw_get_scratch_bo(struct intel_context *intel,
> drm_intel_bo **scratch_bo, int size);
> -
> +void brw_init_shader_time(struct brw_context *brw);
> +void brw_collect_and_report_shader_time(struct brw_context *brw);
> +void brw_destroy_shader_time(struct brw_context *brw);
>
> /* brw_urb.c
> */
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 6dc4707..b84d8f9 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -665,6 +665,8 @@ enum opcode {
> SHADER_OPCODE_TXS,
> FS_OPCODE_TXB,
>
> + SHADER_OPCODE_SHADER_TIME_ADD,
> +
> FS_OPCODE_DDX,
> FS_OPCODE_DDY,
> FS_OPCODE_PIXEL_X,
> @@ -729,6 +731,8 @@ enum opcode {
> #define BRW_ARF_CONTROL 0x80
> #define BRW_ARF_NOTIFICATION_COUNT 0x90
> #define BRW_ARF_IP 0xA0
> +#define BRW_ARF_TDR 0xB0
> +#define BRW_ARF_TIMESTAMP 0xC0
>
> #define BRW_MRF_COMPR4 (1 << 7)
>
> @@ -956,7 +960,21 @@ enum brw_message_target {
> #define BRW_SCRATCH_SPACE_SIZE_1M 10
> #define BRW_SCRATCH_SPACE_SIZE_2M 11
>
Maybe add an /** URB Atomic Operations */ comment here?
> -
> +#define BRW_AOP_AND 1
> +#define BRW_AOP_OR 2
> +#define BRW_AOP_XOR 3
> +#define BRW_AOP_MOV 4
> +#define BRW_AOP_INC 5
> +#define BRW_AOP_DEC 6
> +#define BRW_AOP_ADD 7
> +#define BRW_AOP_SUB 8
> +#define BRW_AOP_REVSUB 9
> +#define BRW_AOP_IMAX 10
> +#define BRW_AOP_IMIN 11
> +#define BRW_AOP_UMAX 12
> +#define BRW_AOP_UMIN 13
> +#define BRW_AOP_CMPWR 14
> +#define BRW_AOP_PREDEC 15
This looks awesome. Series is:
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
More information about the mesa-dev
mailing list