[Mesa-dev] [PATCH 7/9] radeonsi: add HUD queries for cache flush stats
Marek Olšák
maraeo at gmail.com
Mon Jan 2 20:17:04 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeon/r600_pipe_common.h | 3 +++
src/gallium/drivers/radeon/r600_query.c | 21 +++++++++++++++++++++
src/gallium/drivers/radeon/r600_query.h | 3 +++
src/gallium/drivers/radeonsi/si_state_draw.c | 5 +++++
4 files changed, 32 insertions(+)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 25d40da..8ebaed7 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -567,20 +567,23 @@ struct r600_common_context {
/* Misc stats. */
unsigned num_draw_calls;
unsigned num_spill_draw_calls;
unsigned num_compute_calls;
unsigned num_spill_compute_calls;
unsigned num_dma_calls;
unsigned num_cp_dma_calls;
unsigned num_vs_flushes;
unsigned num_ps_flushes;
unsigned num_cs_flushes;
+ unsigned num_fb_cache_flushes;
+ unsigned num_L2_invalidates;
+ unsigned num_L2_writebacks;
uint64_t num_alloc_tex_transfer_bytes;
unsigned last_tex_ps_draw_ratio; /* for query */
/* Render condition. */
struct r600_atom render_cond_atom;
struct pipe_query *render_cond;
unsigned render_cond_mode;
bool render_cond_invert;
bool render_cond_force_off; /* for u_blitter */
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 0e4270a..6b93329 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -106,20 +106,29 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
break;
case R600_QUERY_NUM_VS_FLUSHES:
query->begin_result = rctx->num_vs_flushes;
break;
case R600_QUERY_NUM_PS_FLUSHES:
query->begin_result = rctx->num_ps_flushes;
break;
case R600_QUERY_NUM_CS_FLUSHES:
query->begin_result = rctx->num_cs_flushes;
break;
+ case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+ query->begin_result = rctx->num_fb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->begin_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->begin_result = rctx->num_L2_writebacks;
+ break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_MAPPED_VRAM:
case R600_QUERY_MAPPED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BACK_BUFFER_PS_DRAW_RATIO:
@@ -190,20 +199,29 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
break;
case R600_QUERY_NUM_VS_FLUSHES:
query->end_result = rctx->num_vs_flushes;
break;
case R600_QUERY_NUM_PS_FLUSHES:
query->end_result = rctx->num_ps_flushes;
break;
case R600_QUERY_NUM_CS_FLUSHES:
query->end_result = rctx->num_cs_flushes;
break;
+ case R600_QUERY_NUM_FB_CACHE_FLUSHES:
+ query->end_result = rctx->num_fb_cache_flushes;
+ break;
+ case R600_QUERY_NUM_L2_INVALIDATES:
+ query->end_result = rctx->num_L2_invalidates;
+ break;
+ case R600_QUERY_NUM_L2_WRITEBACKS:
+ query->end_result = rctx->num_L2_writebacks;
+ break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
case R600_QUERY_MAPPED_VRAM:
case R600_QUERY_MAPPED_GTT:
case R600_QUERY_VRAM_USAGE:
case R600_QUERY_GTT_USAGE:
case R600_QUERY_GPU_TEMPERATURE:
case R600_QUERY_CURRENT_GPU_SCLK:
case R600_QUERY_CURRENT_GPU_MCLK:
case R600_QUERY_BUFFER_WAIT_TIME:
@@ -1658,20 +1676,23 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
+ X("num-fb-cache-flushes", NUM_FB_CACHE_FLUSHES, UINT64, AVERAGE),
+ X("num-L2-invalidates", NUM_L2_INVALIDATES, UINT64, AVERAGE),
+ X("num-L2-writebacks", NUM_L2_WRITEBACKS, UINT64, AVERAGE),
X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
X("num-ctx-flushes", NUM_CTX_FLUSHES, UINT64, AVERAGE),
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
X("num-evictions", NUM_EVICTIONS, UINT64, CUMULATIVE),
X("VRAM-usage", VRAM_USAGE, BYTES, AVERAGE),
X("GTT-usage", GTT_USAGE, BYTES, AVERAGE),
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 2ff586a..af434fa 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -45,20 +45,23 @@ struct r600_resource;
enum {
R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
R600_QUERY_SPILL_DRAW_CALLS,
R600_QUERY_COMPUTE_CALLS,
R600_QUERY_SPILL_COMPUTE_CALLS,
R600_QUERY_DMA_CALLS,
R600_QUERY_CP_DMA_CALLS,
R600_QUERY_NUM_VS_FLUSHES,
R600_QUERY_NUM_PS_FLUSHES,
R600_QUERY_NUM_CS_FLUSHES,
+ R600_QUERY_NUM_FB_CACHE_FLUSHES,
+ R600_QUERY_NUM_L2_INVALIDATES,
+ R600_QUERY_NUM_L2_WRITEBACKS,
R600_QUERY_REQUESTED_VRAM,
R600_QUERY_REQUESTED_GTT,
R600_QUERY_MAPPED_VRAM,
R600_QUERY_MAPPED_GTT,
R600_QUERY_BUFFER_WAIT_TIME,
R600_QUERY_NUM_CTX_FLUSHES,
R600_QUERY_NUM_BYTES_MOVED,
R600_QUERY_NUM_EVICTIONS,
R600_QUERY_VRAM_USAGE,
R600_QUERY_GTT_USAGE,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cae19dc..b3f664e 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -728,20 +728,23 @@ static void si_emit_surface_sync(struct r600_common_context *rctx,
radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
}
void si_emit_cache_flush(struct si_context *sctx)
{
struct r600_common_context *rctx = &sctx->b;
struct radeon_winsys_cs *cs = rctx->gfx.cs;
uint32_t cp_coher_cntl = 0;
+ if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER)
+ sctx->b.num_fb_cache_flushes++;
+
/* SI has a bug that it always flushes ICACHE and KCACHE if either
* bit is set. An alternative way is to write SQC_CACHES, but that
* doesn't seem to work reliably. Since the bug doesn't affect
* correctness (it only does more work than necessary) and
* the performance impact is likely negligible, there is no plan
* to add a workaround for it.
*/
if (rctx->flags & SI_CONTEXT_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
@@ -845,35 +848,37 @@ void si_emit_cache_flush(struct si_context *sctx)
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
(rctx->chip_class <= CIK &&
(rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
/* Invalidate L1 & L2. (L1 is always invalidated)
* WB must be set on VI+ when TC_ACTION is set.
*/
si_emit_surface_sync(rctx, cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
cp_coher_cntl = 0;
+ sctx->b.num_L2_invalidates++;
} else {
/* L1 invalidation and L2 writeback must be done separately,
* because both operations can't be done together.
*/
if (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2) {
/* WB = write-back
* NC = apply to non-coherent MTYPEs
* (i.e. MTYPE <= 1, which is what we use everywhere)
*
* WB doesn't work without NC.
*/
si_emit_surface_sync(rctx, cp_coher_cntl |
S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
+ sctx->b.num_L2_writebacks++;
}
if (rctx->flags & SI_CONTEXT_INV_VMEM_L1) {
/* Invalidate per-CU VMEM L1. */
si_emit_surface_sync(rctx, cp_coher_cntl |
S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
}
}
/* If TC flushes haven't cleared this... */
--
2.7.4
More information about the mesa-dev
mailing list