[Mesa-dev] [PATCH 02/10] radeonsi: add a driver query for counting CP DMA calls
Marek Olšák
maraeo at gmail.com
Sat Oct 29 11:17:17 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
CP DMA calls are synchronous with regard to shaders, but can be made
asynchronous if needed.
---
src/gallium/drivers/radeon/r600_pipe_common.h | 1 +
src/gallium/drivers/radeon/r600_query.c | 7 +++++++
src/gallium/drivers/radeon/r600_query.h | 1 +
src/gallium/drivers/radeonsi/si_cp_dma.c | 4 ++++
4 files changed, 13 insertions(+)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 9356274..a33e290 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -563,20 +563,21 @@ struct r600_common_context {
unsigned num_cs_dw_queries_suspend;
/* Additional hardware info. */
unsigned backend_mask;
unsigned max_db; /* for OQ */
/* Misc stats. */
unsigned num_draw_calls;
unsigned num_spill_draw_calls;
unsigned num_compute_calls;
unsigned num_spill_compute_calls;
unsigned num_dma_calls;
+ unsigned num_cp_dma_calls;
unsigned num_vs_flushes;
unsigned num_ps_flushes;
unsigned num_cs_flushes;
uint64_t num_alloc_tex_transfer_bytes;
unsigned last_tex_ps_draw_ratio; /* for query */
/* Render condition. */
struct r600_atom render_cond_atom;
struct pipe_query *render_cond;
unsigned render_cond_mode;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 9c9694a..0e4270a 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -94,20 +94,23 @@ static bool r600_query_sw_begin(struct r600_common_context *rctx,
break;
case R600_QUERY_COMPUTE_CALLS:
query->begin_result = rctx->num_compute_calls;
break;
case R600_QUERY_SPILL_COMPUTE_CALLS:
query->begin_result = rctx->num_spill_compute_calls;
break;
case R600_QUERY_DMA_CALLS:
query->begin_result = rctx->num_dma_calls;
break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->begin_result = rctx->num_cp_dma_calls;
+ break;
case R600_QUERY_NUM_VS_FLUSHES:
query->begin_result = rctx->num_vs_flushes;
break;
case R600_QUERY_NUM_PS_FLUSHES:
query->begin_result = rctx->num_ps_flushes;
break;
case R600_QUERY_NUM_CS_FLUSHES:
query->begin_result = rctx->num_cs_flushes;
break;
case R600_QUERY_REQUESTED_VRAM:
@@ -175,20 +178,23 @@ static bool r600_query_sw_end(struct r600_common_context *rctx,
break;
case R600_QUERY_COMPUTE_CALLS:
query->end_result = rctx->num_compute_calls;
break;
case R600_QUERY_SPILL_COMPUTE_CALLS:
query->end_result = rctx->num_spill_compute_calls;
break;
case R600_QUERY_DMA_CALLS:
query->end_result = rctx->num_dma_calls;
break;
+ case R600_QUERY_CP_DMA_CALLS:
+ query->end_result = rctx->num_cp_dma_calls;
+ break;
case R600_QUERY_NUM_VS_FLUSHES:
query->end_result = rctx->num_vs_flushes;
break;
case R600_QUERY_NUM_PS_FLUSHES:
query->end_result = rctx->num_ps_flushes;
break;
case R600_QUERY_NUM_CS_FLUSHES:
query->end_result = rctx->num_cs_flushes;
break;
case R600_QUERY_REQUESTED_VRAM:
@@ -1648,20 +1654,21 @@ err:
static struct pipe_driver_query_info r600_driver_query_list[] = {
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
X("num-shader-cache-hits", NUM_SHADER_CACHE_HITS, UINT64, CUMULATIVE),
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
X("spill-draw-calls", SPILL_DRAW_CALLS, UINT64, AVERAGE),
X("compute-calls", COMPUTE_CALLS, UINT64, AVERAGE),
X("spill-compute-calls", SPILL_COMPUTE_CALLS, UINT64, AVERAGE),
X("dma-calls", DMA_CALLS, UINT64, AVERAGE),
+ X("cp-dma-calls", CP_DMA_CALLS, UINT64, AVERAGE),
X("num-vs-flushes", NUM_VS_FLUSHES, UINT64, AVERAGE),
X("num-ps-flushes", NUM_PS_FLUSHES, UINT64, AVERAGE),
X("num-cs-flushes", NUM_CS_FLUSHES, UINT64, AVERAGE),
X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
X("mapped-VRAM", MAPPED_VRAM, BYTES, AVERAGE),
X("mapped-GTT", MAPPED_GTT, BYTES, AVERAGE),
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
X("num-ctx-flushes", NUM_CTX_FLUSHES, UINT64, AVERAGE),
X("num-bytes-moved", NUM_BYTES_MOVED, BYTES, CUMULATIVE),
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index 0e14753..2ff586a 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -41,20 +41,21 @@ struct r600_common_screen;
struct r600_query;
struct r600_query_hw;
struct r600_resource;
enum {
R600_QUERY_DRAW_CALLS = PIPE_QUERY_DRIVER_SPECIFIC,
R600_QUERY_SPILL_DRAW_CALLS,
R600_QUERY_COMPUTE_CALLS,
R600_QUERY_SPILL_COMPUTE_CALLS,
R600_QUERY_DMA_CALLS,
+ R600_QUERY_CP_DMA_CALLS,
R600_QUERY_NUM_VS_FLUSHES,
R600_QUERY_NUM_PS_FLUSHES,
R600_QUERY_NUM_CS_FLUSHES,
R600_QUERY_REQUESTED_VRAM,
R600_QUERY_REQUESTED_GTT,
R600_QUERY_MAPPED_VRAM,
R600_QUERY_MAPPED_GTT,
R600_QUERY_BUFFER_WAIT_TIME,
R600_QUERY_NUM_CTX_FLUSHES,
R600_QUERY_NUM_BYTES_MOVED,
diff --git a/src/gallium/drivers/radeonsi/si_cp_dma.c b/src/gallium/drivers/radeonsi/si_cp_dma.c
index 6667ae3..5809317 100644
--- a/src/gallium/drivers/radeonsi/si_cp_dma.c
+++ b/src/gallium/drivers/radeonsi/si_cp_dma.c
@@ -207,20 +207,22 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Emit the clear packet. */
si_emit_cp_dma(sctx, va, value, byte_count, dma_flags, coher);
size -= byte_count;
va += byte_count;
}
if (tc_l2_flag)
r600_resource(dst)->TC_L2_dirty = true;
+
+ sctx->b.num_cp_dma_calls++;
}
/**
* Realign the CP DMA engine. This must be done after a copy with an unaligned
* size.
*
* \param size Remaining size to the CP DMA alignment.
*/
static void si_cp_dma_realign_engine(struct si_context *sctx, unsigned size)
{
@@ -331,16 +333,18 @@ void si_copy_buffer(struct si_context *sctx,
si_emit_cp_dma(sctx, dst_offset, src_offset, skipped_size,
dma_flags, R600_COHERENCY_SHADER);
}
/* Finally, realign the engine if the size wasn't aligned. */
if (realign_size)
si_cp_dma_realign_engine(sctx, realign_size);
if (tc_l2_flag)
r600_resource(dst)->TC_L2_dirty = true;
+
+ sctx->b.num_cp_dma_calls++;
}
void si_init_cp_dma_functions(struct si_context *sctx)
{
sctx->b.clear_buffer = si_clear_buffer;
}
--
2.7.4
More information about the mesa-dev
mailing list