[Mesa-dev] [PATCH 02/10] radeonsi: add a driver query for counting CP DMA calls

Marek Olšák maraeo at gmail.com
Sat Oct 29 11:17:17 UTC 2016


From: Marek Olšák <marek.olsak at amd.com>

CP DMA calls are synchronous with regard to shaders, but can be made
asynchronous if needed.
---
 src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
 src/gallium/drivers/radeon/r600_query.c       | 7 +++++++
 src/gallium/drivers/radeon/r600_query.h       | 1 +
 src/gallium/drivers/radeonsi/si_cp_dma.c      | 4 ++++
 4 files changed, 13 insertions(+)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 9356274..a33e290 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -563,20 +563,21 @@ struct r600_common_context {
 	unsigned			num_cs_dw_queries_suspend;
 	/* Additional hardware info. */
 	unsigned			backend_mask;
 	unsigned			max_db; /* for OQ */
 	/* Misc stats. */
 	unsigned			num_draw_calls;
 	unsigned			num_spill_draw_calls;
 	unsigned			num_compute_calls;
 	unsigned			num_spill_compute_calls;
 	unsigned			num_dma_calls;
+	unsigned			num_cp_dma_calls;
 	unsigned			num_vs_flushes;
 	unsigned			num_ps_flushes;
 	unsigned			num_cs_flushes;
 	uint64_t			num_alloc_tex_transfer_bytes;
 	unsigned			last_tex_ps_draw_ratio; /* for query */
 
 	/* Render condition. */
 	struct r600_atom		render_cond_atom;
 	struct pipe_query		*render_cond;
 	unsigned			render_cond_mode;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 9c9694a..0e4270a 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -94,20 +94,23 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
 		break;
 	case R600_QUERY_COMPUTE_CALLS:
 		query->begin_result = rctx->num_compute_calls;
 		break;
 	case R600_QUERY_SPILL_COMPUTE_CALLS:
 		query->begin_result = rctx->num_spill_compute_calls;
 		break;
 	case R600_QUERY_DMA_CALLS:
 		query->begin_result = rctx->num_dma_calls;
 		break;
+	case R600_QUERY_CP_DMA_CALLS:
+		query->begin_result = rctx->num_cp_dma_calls;
+		break;
 	case R600_QUERY_NUM_VS_FLUSHES:
 		query->begin_result = rctx->num_vs_flushes;
 		break;
 	case R600_QUERY_NUM_PS_FLUSHES:
 		query->begin_result = rctx->num_ps_flushes;
 		break;
 	case R600_QUERY_NUM_CS_FLUSHES:
 		query->begin_result = rctx->num_cs_flushes;
 		break;
 	case R600_QUERY_REQUESTED_VRAM:
@@ -175,20 +178,23 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
 		break;
 	case R600_QUERY_COMPUTE_CALLS:
 		query->end_result = rctx->num_compute_calls;
 		break;
 	case R600_QUERY_SPILL_COMPUTE_CALLS:
 		query->end_result = rctx->num_spill_compute_calls;
 		break;
 	case R600_QUERY_DMA_CALLS:
 		query->end_result = rctx->num_dma_calls;
 		break;
+	case R600_QUERY_CP_DMA_CALLS:
+		query->end_result = rctx->num_cp_dma_calls;
+		break;
 	case R600_QUERY_NUM_VS_FLUSHES:
 		query->end_result = rctx->num_vs_flushes;
 		break;
 	case R600_QUERY_NUM_PS_FLUSHES:
 		query->end_result = rctx->num_ps_flushes;
 		break;
 	case R600_QUERY_NUM_CS_FLUSHES:
 		query->end_result = rctx->num_cs_flushes;
 		break;
 	case R600_QUERY_REQUESTED_VRAM:
@@ -1648,20 +1654,21 @@ err:
 
 static struct pipe_driver_query_info r600_driver_query_list[] = {
 	X("num-compilations",		NUM_COMPILATIONS,	UINT64, CUMULATIVE),
 	X("num-shaders-created",	NUM_SHADERS_CREATED,	UINT64, CUMULATIVE),
 	X("num-shader-cache-hits",	NUM_SHADER_CACHE_HITS,	UINT64, CUMULATIVE),
 	X("draw-calls",			DRAW_CALLS,		UINT64, AVERAGE),
 	X("spill-draw-calls",		SPILL_DRAW_CALLS,	UINT64, AVERAGE),
 	X("compute-calls",		COMPUTE_CALLS,		UINT64, AVERAGE),
 	X("spill-compute-calls",	SPILL_COMPUTE_CALLS,	UINT64, AVERAGE),
 	X("dma-calls",			DMA_CALLS,		UINT64, AVERAGE),
+	X("cp-dma-calls",		CP_DMA_CALLS,		UINT64, AVERAGE),
 	X("num-vs-flushes",		NUM_VS_FLUSHES,		UINT64, AVERAGE),
 	X("num-ps-flushes",		NUM_PS_FLUSHES,		UINT64, AVERAGE),
 	X("num-cs-flushes",		NUM_CS_FLUSHES,		UINT64, AVERAGE),
 	X("requested-VRAM",		REQUESTED_VRAM,		BYTES, AVERAGE),
 	X("requested-GTT",		REQUESTED_GTT,		BYTES, AVERAGE),
 	X("mapped-VRAM",		MAPPED_VRAM,		BYTES, AVERAGE),
 	X("mapped-GTT",			MAPPED_GTT,		BYTES, AVERAGE),
 	X("buffer-wait-time",		BUFFER_WAIT_TIME,	MICROSECONDS, CUMULATIVE),
 	X("num-ctx-flushes",		NUM_CTX_FLUSHES,	UINT64, AVERAGE),
 	X("num-bytes-moved",		NUM_BYTES_MOVED,	BYTES, CUMULATIVE),
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 0e14753..2ff586a 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -41,20 +41,21 @@ struct r600_common_screen;
 struct r600_query;
 struct r600_query_hw;
 struct r600_resource;
 
 enum {
 	R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
 	R600_QUERY_SPILL_DRAW_CALLS,
 	R600_QUERY_COMPUTE_CALLS,
 	R600_QUERY_SPILL_COMPUTE_CALLS,
 	R600_QUERY_DMA_CALLS,
+	R600_QUERY_CP_DMA_CALLS,
 	R600_QUERY_NUM_VS_FLUSHES,
 	R600_QUERY_NUM_PS_FLUSHES,
 	R600_QUERY_NUM_CS_FLUSHES,
 	R600_QUERY_REQUESTED_VRAM,
 	R600_QUERY_REQUESTED_GTT,
 	R600_QUERY_MAPPED_VRAM,
 	R600_QUERY_MAPPED_GTT,
 	R600_QUERY_BUFFER_WAIT_TIME,
 	R600_QUERY_NUM_CTX_FLUSHES,
 	R600_QUERY_NUM_BYTES_MOVED,
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 6667ae3..5809317 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -207,20 +207,22 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
 
 		/* Emit the clear packet. */
 		si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, coher);
 
 		size -= byte_count;
 		va += byte_count;
 	}
 
 	if (tc_l2_flag)
 		r600_resource(dst)->TC_L2_dirty = true;
+
+	sctx->b.num_cp_dma_calls++;
 }
 
 /**
  * Realign the CP DMA engine. This must be done after a copy with an unaligned
  * size.
  *
  * \param size  Remaining size to the CP DMA alignment.
  */
 static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size)
 {
@@ -331,16 +333,18 @@ void si_copy_buffer(struct si_context *sctx,
 		si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size,
 			       dma_flags, R600_COHERENCY_SHADER);
 	}
 
 	/* Finally, realign the engine if the size wasn't aligned. */
 	if (realign_size)
 		si_cp_dma_realign_engine(sctx, realign_size);
 
 	if (tc_l2_flag)
 		r600_resource(dst)->TC_L2_dirty = true;
+
+	sctx->b.num_cp_dma_calls++;
 }
 
 void si_init_cp_dma_functions(struct si_context *sctx)
 {
 	sctx->b.clear_buffer = si_clear_buffer;
 }
-- 
2.7.4



More information about the mesa-dev mailing list