[Mesa-dev] [PATCH 3/4] radeonsi: re-order the SQ_xx performance counter blocks

Nicolai Hähnle nhaehnle at gmail.com
Wed Feb 3 13:52:10 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

This is yet another change motivated by appeasing AMD GPUPerfStudio's
hardcoding of performance counter group numbers.
---
 src/gallium/drivers/radeon/r600_perfcounter.c | 38 ++++++++++++---------------
 src/gallium/drivers/radeon/r600_query.h       | 22 +++-------------
 src/gallium/drivers/radeonsi/si_perfcounter.c | 21 +++++++++++++++
 3 files changed, 42 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
index fad7bde..f3529a1 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -33,10 +33,6 @@
 /* Max counters per HW block */
 #define R600_QUERY_MAX_COUNTERS 16
 
-static const char * const r600_pc_shader_suffix[] = {
-	"", "_PS", "_VS", "_GS", "_ES", "_HS", "_LS", "_CS"
-};
-
 static struct r600_perfcounter_block *
 lookup_counter(struct r600_perfcounters *pc, unsigned index,
 	       unsigned *base_gid, unsigned *sub_index)
@@ -92,6 +88,8 @@ struct r600_pc_counter {
 	unsigned stride;
 };
 
+#define R600_PC_SHADERS_WINDOWING (1 << 31)
+
 struct r600_query_pc {
 	struct r600_query_hw b;
 
@@ -246,32 +244,29 @@ static struct r600_pc_group *get_group_state(struct r600_common_screen *screen,
 	if (block->flags & R600_PC_BLOCK_SHADER) {
 		unsigned sub_gids = block->num_instances;
 		unsigned shader_id;
-		unsigned shader_mask;
-		unsigned query_shader_mask;
+		unsigned shaders;
+		unsigned query_shaders;
 
 		if (block->flags & R600_PC_BLOCK_SE_GROUPS)
 			sub_gids = sub_gids * screen->info.max_se;
 		shader_id = sub_gid / sub_gids;
 		sub_gid = sub_gid % sub_gids;
 
-		if (shader_id == 0)
-			shader_mask = R600_PC_SHADER_ALL;
-		else
-			shader_mask = 1 << (shader_id - 1);
+		shaders = screen->perfcounters->shader_type_bits[shader_id];
 
-		query_shader_mask = query->shaders & R600_PC_SHADER_ALL;
-		if (query_shader_mask && query_shader_mask != shader_mask) {
+		query_shaders = query->shaders & ~R600_PC_SHADERS_WINDOWING;
+		if (query_shaders && query_shaders != shaders) {
 			fprintf(stderr, "r600_perfcounter: incompatible shader groups\n");
 			FREE(group);
 			return NULL;
 		}
-		query->shaders |= shader_mask;
+		query->shaders = shaders;
 	}
 
-	if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED) {
+	if (block->flags & R600_PC_BLOCK_SHADER_WINDOWED && !query->shaders) {
 		// A non-zero value in query->shaders ensures that the shader
 		// masking is reset unless the user explicitly requests one.
-		query->shaders |= R600_PC_SHADER_WINDOWING;
+		query->shaders = R600_PC_SHADERS_WINDOWING;
 	}
 
 	if (block->flags & R600_PC_BLOCK_SE_GROUPS) {
@@ -379,8 +374,8 @@ struct pipe_query *r600_create_batch_query(struct pipe_context *ctx,
 	}
 
 	if (query->shaders) {
-		if ((query->shaders & R600_PC_SHADER_ALL) == 0)
-			query->shaders |= R600_PC_SHADER_ALL;
+		if (query->shaders == R600_PC_SHADERS_WINDOWING)
+			query->shaders = 0xffffffff;
 		query->b.num_cs_dw_begin += pc->num_shaders_cs_dwords;
 	}
 
@@ -438,7 +433,7 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
 	if (block->flags & R600_PC_BLOCK_SE_GROUPS)
 		groups_se = screen->info.max_se;
 	if (block->flags & R600_PC_BLOCK_SHADER)
-		groups_shader = ARRAY_SIZE(r600_pc_shader_suffix);
+		groups_shader = screen->perfcounters->num_shader_types;
 
 	namelen = strlen(block->basename);
 	block->group_name_stride = namelen + 1;
@@ -462,14 +457,15 @@ static boolean r600_init_block_names(struct r600_common_screen *screen,
 
 	groupname = block->group_names;
 	for (i = 0; i < groups_shader; ++i) {
-		unsigned shaderlen = strlen(r600_pc_shader_suffix[i]);
+		const char *shader_suffix = screen->perfcounters->shader_type_suffixes[i];
+		unsigned shaderlen = strlen(shader_suffix);
 		for (j = 0; j < groups_se; ++j) {
 			for (k = 0; k < groups_instance; ++k) {
 				strcpy(groupname, block->basename);
 				p = groupname + namelen;
 
 				if (block->flags & R600_PC_BLOCK_SHADER) {
-					strcpy(p, r600_pc_shader_suffix[i]);
+					strcpy(p, shader_suffix);
 					p += shaderlen;
 				}
 
@@ -626,7 +622,7 @@ void r600_perfcounters_add_block(struct r600_common_screen *rscreen,
 	if (block->flags & R600_PC_BLOCK_SE_GROUPS)
 		block->num_groups *= rscreen->info.max_se;
 	if (block->flags & R600_PC_BLOCK_SHADER)
-		block->num_groups *= ARRAY_SIZE(r600_pc_shader_suffix);
+		block->num_groups *= pc->num_shader_types;
 
 	++pc->num_blocks;
 	pc->num_groups += block->num_groups;
diff --git a/src/gallium/drivers/radeon/r600_query.h b/src/gallium/drivers/radeon/r600_query.h
index dbc950f..8b2c4e3 100644
--- a/src/gallium/drivers/radeon/r600_query.h
+++ b/src/gallium/drivers/radeon/r600_query.h
@@ -166,24 +166,6 @@ enum {
 	R600_PC_BLOCK_SHADER_WINDOWED = (1 << 4),
 };
 
-/* Shader enable bits. Chosen to coincide with SQ_PERFCOUNTER_CTRL values */
-enum {
-	R600_PC_SHADER_PS = (1 << 0),
-	R600_PC_SHADER_VS = (1 << 1),
-	R600_PC_SHADER_GS = (1 << 2),
-	R600_PC_SHADER_ES = (1 << 3),
-	R600_PC_SHADER_HS = (1 << 4),
-	R600_PC_SHADER_LS = (1 << 5),
-	R600_PC_SHADER_CS = (1 << 6),
-
-	R600_PC_SHADER_ALL = R600_PC_SHADER_PS | R600_PC_SHADER_VS |
-			     R600_PC_SHADER_GS | R600_PC_SHADER_ES |
-			     R600_PC_SHADER_HS | R600_PC_SHADER_LS |
-			     R600_PC_SHADER_CS,
-
-	R600_PC_SHADER_WINDOWING = (1 << 31),
-};
-
 /* Describes a hardware block with performance counters. Multiple instances of
  * each block, possibly per-SE, may exist on the chip. Depending on the block
  * and on the user's configuration, we either
@@ -220,6 +202,10 @@ struct r600_perfcounters {
 	unsigned num_instance_cs_dwords;
 	unsigned num_shaders_cs_dwords;
 
+	unsigned num_shader_types;
+	const char * const *shader_type_suffixes;
+	const unsigned *shader_type_bits;
+
 	void (*get_size)(struct r600_perfcounter_block *,
 			 unsigned count, unsigned *selectors,
 			 unsigned *num_select_dw, unsigned *num_read_dw);
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index b658866..f944a52 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -79,6 +79,23 @@ struct si_pc_block {
 	unsigned instances;
 };
 
+/* The order is chosen to be compatible with GPUPerfStudio's hardcoding of
+ * performance counter group IDs.
+ */
+static const char * const si_pc_shader_type_suffixes[] = {
+	"", "_ES", "_GS", "_VS", "_PS", "_LS", "_HS", "_CS"
+};
+
+static const unsigned si_pc_shader_type_bits[] = {
+	0x7f,
+	S_036780_ES_EN(1),
+	S_036780_GS_EN(1),
+	S_036780_VS_EN(1),
+	S_036780_PS_EN(1),
+	S_036780_LS_EN(1),
+	S_036780_HS_EN(1),
+	S_036780_CS_EN(1),
+};
 
 static struct si_pc_block_base cik_CB = {
 	.name = "CB",
@@ -662,6 +679,10 @@ void si_init_perfcounters(struct si_screen *screen)
 		pc->num_stop_cs_dwords += 6;
 	}
 
+	pc->num_shader_types = ARRAY_SIZE(si_pc_shader_type_bits);
+	pc->shader_type_suffixes = si_pc_shader_type_suffixes;
+	pc->shader_type_bits = si_pc_shader_type_bits;
+
 	pc->get_size = si_pc_get_size;
 	pc->emit_instance = si_pc_emit_instance;
 	pc->emit_shaders = si_pc_emit_shaders;
-- 
2.5.0



More information about the mesa-dev mailing list