[Mesa-dev] [PATCH 14/20] radeonsi: add HUD queries for counting VS/PS/CS partial flushes
Edward O'Callaghan
funfunctor at folklore1984.net
Tue Aug 30 09:04:58 UTC 2016
Reviewed-by: Edward O'Callaghan <funfunctor at folklore1984.net>
On 08/30/2016 01:28 AM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> ---
> src/gallium/drivers/radeon/r600_pipe_common.h | 3 +++
> src/gallium/drivers/radeon/r600_query.c | 21 +++++++++++++++++++++
> src/gallium/drivers/radeon/r600_query.h | 3 +++
> src/gallium/drivers/radeonsi/si_state_draw.c | 8 ++++++++
> 4 files changed, 35 insertions(+)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 624dea3..d821eaa 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -551,20 +551,23 @@ struct r600_common_context {
> unsigned num_cs_dw_queries_suspend;
> /* Additional hardware info. */
> unsigned backend_mask;
> unsigned max_db; /* for OQ */
> /* Misc stats. */
> unsigned num_draw_calls;
> unsigned num_spill_draw_calls;
> unsigned num_compute_calls;
> unsigned num_spill_compute_calls;
> unsigned num_dma_calls;
> + unsigned num_vs_flushes;
> + unsigned num_ps_flushes;
> + unsigned num_cs_flushes;
> uint64_t num_alloc_tex_transfer_bytes;
> unsigned last_tex_ps_draw_ratio; /* for query */
>
> /* Render condition. */
> struct r600_atom render_cond_atom;
> struct pipe_query *render_cond;
> unsigned render_cond_mode;
> bool render_cond_invert;
> bool render_cond_force_off; /* for u_blitter */
>
> diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
> index 29ad249..2c3d530 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -83,20 +83,29 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
> break;
> case R600_QUERY_COMPUTE_CALLS:
> query->begin_result = rctx->num_compute_calls;
> break;
> case R600_QUERY_SPILL_COMPUTE_CALLS:
> query->begin_result = rctx->num_spill_compute_calls;
> break;
> case R600_QUERY_DMA_CALLS:
> query->begin_result = rctx->num_dma_calls;
> break;
> + case R600_QUERY_NUM_VS_FLUSHES:
> + query->begin_result = rctx->num_vs_flushes;
> + break;
> + case R600_QUERY_NUM_PS_FLUSHES:
> + query->begin_result = rctx->num_ps_flushes;
> + break;
> + case R600_QUERY_NUM_CS_FLUSHES:
> + query->begin_result = rctx->num_cs_flushes;
> + break;
> case R600_QUERY_REQUESTED_VRAM:
> case R600_QUERY_REQUESTED_GTT:
> case R600_QUERY_MAPPED_VRAM:
> case R600_QUERY_MAPPED_GTT:
> case R600_QUERY_VRAM_USAGE:
> case R600_QUERY_GTT_USAGE:
> case R600_QUERY_GPU_TEMPERATURE:
> case R600_QUERY_CURRENT_GPU_SCLK:
> case R600_QUERY_CURRENT_GPU_MCLK:
> case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
> @@ -151,20 +160,29 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
> break;
> case R600_QUERY_COMPUTE_CALLS:
> query->end_result = rctx->num_compute_calls;
> break;
> case R600_QUERY_SPILL_COMPUTE_CALLS:
> query->end_result = rctx->num_spill_compute_calls;
> break;
> case R600_QUERY_DMA_CALLS:
> query->end_result = rctx->num_dma_calls;
> break;
> + case R600_QUERY_NUM_VS_FLUSHES:
> + query->end_result = rctx->num_vs_flushes;
> + break;
> + case R600_QUERY_NUM_PS_FLUSHES:
> + query->end_result = rctx->num_ps_flushes;
> + break;
> + case R600_QUERY_NUM_CS_FLUSHES:
> + query->end_result = rctx->num_cs_flushes;
> + break;
> case R600_QUERY_REQUESTED_VRAM:
> case R600_QUERY_REQUESTED_GTT:
> case R600_QUERY_MAPPED_VRAM:
> case R600_QUERY_MAPPED_GTT:
> case R600_QUERY_VRAM_USAGE:
> case R600_QUERY_GTT_USAGE:
> case R600_QUERY_GPU_TEMPERATURE:
> case R600_QUERY_CURRENT_GPU_SCLK:
> case R600_QUERY_CURRENT_GPU_MCLK:
> case R600_QUERY_BUFFER_WAIT_TIME:
> @@ -1175,20 +1193,23 @@ err:
> XFULL(name_, query_type_, type_, result_type_, R600_QUERY_GROUP_##group_)
>
> static struct pipe_driver_query_info r600_driver_query_list[] = {
> X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
> X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
> X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
> X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
> X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
> X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
> X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
> + X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
> + X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
> + X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
> X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
> X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
> X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
> X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
> X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
> X("num-ctx-flushes", NUM_CTX_FLUSHES, UINT64, AVERAGE),
> X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
> X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
> X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
> X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
> diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
> index 89f5516..0cd1a02 100644
> --- a/src/gallium/drivers/radeon/r600_query.h
> +++ b/src/gallium/drivers/radeon/r600_query.h
> @@ -39,20 +39,23 @@ struct r600_common_screen;
> struct r600_query;
> struct r600_query_hw;
> struct r600_resource;
>
> enum {
> R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
> R600_QUERY_SPILL_DRAW_CALLS,
> R600_QUERY_COMPUTE_CALLS,
> R600_QUERY_SPILL_COMPUTE_CALLS,
> R600_QUERY_DMA_CALLS,
> + R600_QUERY_NUM_VS_FLUSHES,
> + R600_QUERY_NUM_PS_FLUSHES,
> + R600_QUERY_NUM_CS_FLUSHES,
> R600_QUERY_REQUESTED_VRAM,
> R600_QUERY_REQUESTED_GTT,
> R600_QUERY_MAPPED_VRAM,
> R600_QUERY_MAPPED_GTT,
> R600_QUERY_BUFFER_WAIT_TIME,
> R600_QUERY_NUM_CTX_FLUSHES,
> R600_QUERY_NUM_BYTES_MOVED,
> R600_QUERY_NUM_EVICTIONS,
> R600_QUERY_VRAM_USAGE,
> R600_QUERY_GTT_USAGE,
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 60cc3f0..9e50bb2 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -766,28 +766,36 @@ void si_emit_cache_flush(struct si_context *si_ctx, struct r600_atom *atom)
>
> /* Wait for shader engines to go idle.
> * VS and PS waits are unnecessary if SURFACE_SYNC is going to wait
> * for everything including CB/DB cache flushes.
> */
> if (!(sctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
> SI_CONTEXT_FLUSH_AND_INV_DB))) {
> if (sctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) {
> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
> + /* Only count explicit shader flushes, not implicit ones
> + * done by SURFACE_SYNC.
> + */
> + sctx->num_vs_flushes++;
> + sctx->num_ps_flushes++;
> } else if (sctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) {
> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
> + sctx->num_vs_flushes++;
> }
> }
> +
> if (sctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH) {
> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
> + sctx->num_cs_flushes++;
> }
>
> /* VGT state synchronization. */
> if (sctx->flags & SI_CONTEXT_VGT_FLUSH) {
> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
> }
> if (sctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) {
> radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
> radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160830/56a78990/attachment-0001.sig>
More information about the mesa-dev
mailing list