[Mesa-dev] [PATCH 1/9] radeonsi: add separate HUD counters for CB and DB cache flushes

Marek Olšák maraeo at gmail.com
Fri Jun 16 12:57:57 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 ++-
 src/gallium/drivers/radeon/r600_query.c       | 17 ++++++++++++-----
 src/gallium/drivers/radeon/r600_query.h       |  3 ++-
 src/gallium/drivers/radeonsi/si_state_draw.c  |  7 ++++---
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 45ed5ba..887c111 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -588,21 +588,22 @@ struct r600_common_context {
 	unsigned			num_draw_calls;
 	unsigned			num_prim_restart_calls;
 	unsigned			num_spill_draw_calls;
 	unsigned			num_compute_calls;
 	unsigned			num_spill_compute_calls;
 	unsigned			num_dma_calls;
 	unsigned			num_cp_dma_calls;
 	unsigned			num_vs_flushes;
 	unsigned			num_ps_flushes;
 	unsigned			num_cs_flushes;
-	unsigned			num_fb_cache_flushes;
+	unsigned			num_cb_cache_flushes;
+	unsigned			num_db_cache_flushes;
 	unsigned			num_L2_invalidates;
 	unsigned			num_L2_writebacks;
 	uint64_t			num_alloc_tex_transfer_bytes;
 	unsigned			last_tex_ps_draw_ratio; /* for query */
 
 	/* Render condition. */
 	struct r600_atom		render_cond_atom;
 	struct pipe_query		*render_cond;
 	unsigned			render_cond_mode;
 	bool				render_cond_invert;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index bce4317..ca8bab0 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -118,22 +118,25 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
 		break;
 	case R600_QUERY_NUM_VS_FLUSHES:
 		query->begin_result = rctx->num_vs_flushes;
 		break;
 	case R600_QUERY_NUM_PS_FLUSHES:
 		query->begin_result = rctx->num_ps_flushes;
 		break;
 	case R600_QUERY_NUM_CS_FLUSHES:
 		query->begin_result = rctx->num_cs_flushes;
 		break;
-	case R600_QUERY_NUM_FB_CACHE_FLUSHES:
-		query->begin_result = rctx->num_fb_cache_flushes;
+	case R600_QUERY_NUM_CB_CACHE_FLUSHES:
+		query->begin_result = rctx->num_cb_cache_flushes;
+		break;
+	case R600_QUERY_NUM_DB_CACHE_FLUSHES:
+		query->begin_result = rctx->num_db_cache_flushes;
 		break;
 	case R600_QUERY_NUM_L2_INVALIDATES:
 		query->begin_result = rctx->num_L2_invalidates;
 		break;
 	case R600_QUERY_NUM_L2_WRITEBACKS:
 		query->begin_result = rctx->num_L2_writebacks;
 		break;
 	case R600_QUERY_TC_OFFLOADED_SLOTS:
 		query->begin_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
 		break;
@@ -260,22 +263,25 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
 		break;
 	case R600_QUERY_NUM_VS_FLUSHES:
 		query->end_result = rctx->num_vs_flushes;
 		break;
 	case R600_QUERY_NUM_PS_FLUSHES:
 		query->end_result = rctx->num_ps_flushes;
 		break;
 	case R600_QUERY_NUM_CS_FLUSHES:
 		query->end_result = rctx->num_cs_flushes;
 		break;
-	case R600_QUERY_NUM_FB_CACHE_FLUSHES:
-		query->end_result = rctx->num_fb_cache_flushes;
+	case R600_QUERY_NUM_CB_CACHE_FLUSHES:
+		query->end_result = rctx->num_cb_cache_flushes;
+		break;
+	case R600_QUERY_NUM_DB_CACHE_FLUSHES:
+		query->end_result = rctx->num_db_cache_flushes;
 		break;
 	case R600_QUERY_NUM_L2_INVALIDATES:
 		query->end_result = rctx->num_L2_invalidates;
 		break;
 	case R600_QUERY_NUM_L2_WRITEBACKS:
 		query->end_result = rctx->num_L2_writebacks;
 		break;
 	case R600_QUERY_TC_OFFLOADED_SLOTS:
 		query->end_result = rctx->tc ? rctx->tc->num_offloaded_slots : 0;
 		break;
@@ -1824,21 +1830,22 @@ static struct pipe_driver_query_info r600_driver_query_list[] = {
 	X("draw-calls",			DRAW_CALLS,		UINT64, AVERAGE),
 	X("prim-restart-calls",		PRIM_RESTART_CALLS,	UINT64, AVERAGE),
 	X("spill-draw-calls",		SPILL_DRAW_CALLS,	UINT64, AVERAGE),
 	X("compute-calls",		COMPUTE_CALLS,		UINT64, AVERAGE),
 	X("spill-compute-calls",	SPILL_COMPUTE_CALLS,	UINT64, AVERAGE),
 	X("dma-calls",			DMA_CALLS,		UINT64, AVERAGE),
 	X("cp-dma-calls",		CP_DMA_CALLS,		UINT64, AVERAGE),
 	X("num-vs-flushes",		NUM_VS_FLUSHES,		UINT64, AVERAGE),
 	X("num-ps-flushes",		NUM_PS_FLUSHES,		UINT64, AVERAGE),
 	X("num-cs-flushes",		NUM_CS_FLUSHES,		UINT64, AVERAGE),
-	X("num-fb-cache-flushes",	NUM_FB_CACHE_FLUSHES,	UINT64, AVERAGE),
+	X("num-CB-cache-flushes",	NUM_CB_CACHE_FLUSHES,	UINT64, AVERAGE),
+	X("num-DB-cache-flushes",	NUM_DB_CACHE_FLUSHES,	UINT64, AVERAGE),
 	X("num-L2-invalidates",		NUM_L2_INVALIDATES,	UINT64, AVERAGE),
 	X("num-L2-writebacks",		NUM_L2_WRITEBACKS,	UINT64, AVERAGE),
 	X("tc-offloaded-slots",		TC_OFFLOADED_SLOTS,     UINT64, AVERAGE),
 	X("tc-direct-slots",		TC_DIRECT_SLOTS,	UINT64, AVERAGE),
 	X("tc-num-syncs",		TC_NUM_SYNCS,		UINT64, AVERAGE),
 	X("CS-thread-busy",		CS_THREAD_BUSY,		UINT64, AVERAGE),
 	X("gallium-thread-busy",	GALLIUM_THREAD_BUSY,	UINT64, AVERAGE),
 	X("requested-VRAM",		REQUESTED_VRAM,		BYTES, AVERAGE),
 	X("requested-GTT",		REQUESTED_GTT,		BYTES, AVERAGE),
 	X("mapped-VRAM",		MAPPED_VRAM,		BYTES, AVERAGE),
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index ed607ec..70866f4 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -44,21 +44,22 @@ enum {
 	R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
 	R600_QUERY_PRIM_RESTART_CALLS,
 	R600_QUERY_SPILL_DRAW_CALLS,
 	R600_QUERY_COMPUTE_CALLS,
 	R600_QUERY_SPILL_COMPUTE_CALLS,
 	R600_QUERY_DMA_CALLS,
 	R600_QUERY_CP_DMA_CALLS,
 	R600_QUERY_NUM_VS_FLUSHES,
 	R600_QUERY_NUM_PS_FLUSHES,
 	R600_QUERY_NUM_CS_FLUSHES,
-	R600_QUERY_NUM_FB_CACHE_FLUSHES,
+	R600_QUERY_NUM_CB_CACHE_FLUSHES,
+	R600_QUERY_NUM_DB_CACHE_FLUSHES,
 	R600_QUERY_NUM_L2_INVALIDATES,
 	R600_QUERY_NUM_L2_WRITEBACKS,
 	R600_QUERY_TC_OFFLOADED_SLOTS,
 	R600_QUERY_TC_DIRECT_SLOTS,
 	R600_QUERY_TC_NUM_SYNCS,
 	R600_QUERY_CS_THREAD_BUSY,
 	R600_QUERY_GALLIUM_THREAD_BUSY,
 	R600_QUERY_REQUESTED_VRAM,
 	R600_QUERY_REQUESTED_GTT,
 	R600_QUERY_MAPPED_VRAM,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index ec564c1..d039e01 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -854,23 +854,24 @@ static void si_emit_surface_sync(struct r600_common_context *rctx,
 }
 
 void si_emit_cache_flush(struct si_context *sctx)
 {
 	struct r600_common_context *rctx = &sctx->b;
 	struct radeon_winsys_cs *cs = rctx->gfx.cs;
 	uint32_t cp_coher_cntl = 0;
 	uint32_t flush_cb_db = rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
 					      SI_CONTEXT_FLUSH_AND_INV_DB);
 
-	if (rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB |
-			   SI_CONTEXT_FLUSH_AND_INV_DB))
-		sctx->b.num_fb_cache_flushes++;
+	if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB)
+		sctx->b.num_cb_cache_flushes++;
+	if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB)
+		sctx->b.num_db_cache_flushes++;
 
 	/* SI has a bug that it always flushes ICACHE and KCACHE if either
 	 * bit is set. An alternative way is to write SQC_CACHES, but that
 	 * doesn't seem to work reliably. Since the bug doesn't affect
 	 * correctness (it only does more work than necessary) and
 	 * the performance impact is likely negligible, there is no plan
 	 * to add a workaround for it.
 	 */
 
 	if (rctx->flags & SI_CONTEXT_INV_ICACHE)
-- 
2.7.4



More information about the mesa-dev mailing list