[Mesa-dev] [PATCH] radv: add support for shader stats dump

Tue Nov 22 04:44:59 UTC 2016

From: Dave Airlie <airlied at redhat.com>

I've started working on a shader-db alike for Vulkan,
it's based on vktrace and it records pipelines, this
adds support to dump the shader stats exactly like
radeonsi does, so I can reuse the shader-db scripts it
uses.

Signed-off-by: Dave Airlie <airlied at redhat.com>
---
 src/amd/vulkan/radv_device.c   |  2 +-
 src/amd/vulkan/radv_pipeline.c | 84 ++++++++++++++++++++++++++++++++++++++++++
 src/amd/vulkan/radv_private.h  |  3 ++
 3 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 5acaf56..313d7a5 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -682,7 +682,7 @@ VkResult radv_CreateDevice(
 	}
 	device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
 	device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", true);
-
+	device->shader_stats_dump = env_var_as_boolean("RADV_SHADER_STATS", false);
 	if (device->allow_fast_clears && device->allow_dcc)
 		radv_finishme("DCC fast clears have not been tested\n");
 
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index fca0173..5f3ebb3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -257,6 +257,81 @@ radv_shader_compile_to_nir(struct radv_device *device,
 	return nir;
 }
 
+static const char *radv_get_shader_name(struct radv_shader_variant *var,
+					gl_shader_stage stage)
+{
+	switch (stage) {
+	case MESA_SHADER_VERTEX: return "Vertex Shader as VS";
+	case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+	case MESA_SHADER_COMPUTE: return "Compute Shader";
+	default:
+		return "Unknown shader";
+	};
+
+}
+static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+	unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
+	struct radv_shader_variant *var;
+	struct ac_shader_config *conf;
+	int i;
+	FILE *file = stderr;
+	unsigned max_simd_waves = 10;
+	unsigned lds_per_wave = 0;
+
+	for (i = 0; i < MESA_SHADER_STAGES; i++) {
+		if (!pipeline->shaders[i])
+			continue;
+		var = pipeline->shaders[i];
+
+		conf = &var->config;
+
+		if (i == MESA_SHADER_FRAGMENT) {
+			lds_per_wave = conf->lds_size * lds_increment +
+				align(var->info.fs.num_interp * 48, lds_increment);
+		}
+
+		if (conf->num_sgprs) {
+			if (device->instance->physicalDevice.rad_info.chip_class >= VI)
+				max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+			else
+				max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+		}
+
+		if (conf->num_vgprs)
+			max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+		/* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+		 * that PS can use.
+		 */
+		if (lds_per_wave)
+			max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+		fprintf(file, "\n%s:\n",
+			radv_get_shader_name(var, i));
+		if (i == MESA_SHADER_FRAGMENT) {
+			fprintf(file, "*** SHADER CONFIG ***\n"
+				"SPI_PS_INPUT_ADDR = 0x%04x\n"
+				"SPI_PS_INPUT_ENA  = 0x%04x\n",
+				conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+		}
+		fprintf(file, "*** SHADER STATS ***\n"
+			"SGPRS: %d\n"
+			"VGPRS: %d\n"
+		        "Spilled SGPRs: %d\n"
+			"Spilled VGPRs: %d\n"
+			"Code Size: %d bytes\n"
+			"LDS: %d blocks\n"
+			"Scratch: %d bytes per wave\n"
+			"Max Waves: %d\n"
+			"********************\n\n\n",
+			conf->num_sgprs, conf->num_vgprs,
+			conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
+			conf->lds_size, conf->scratch_bytes_per_wave,
+			max_simd_waves);
+	}
+}
+
 void radv_shader_variant_destroy(struct radv_device *device,
                                  struct radv_shader_variant *variant)
 {
@@ -297,6 +372,7 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic
 			      &variant->info, shader, &options, dump);
 	LLVMDisposeTargetMachine(tm);
 
+	variant->code_size = binary.code_size;
 	bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
 	unsigned vgpr_comp_cnt = 0;
 
@@ -1336,6 +1412,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		pipeline->binding_stride[desc->binding] = desc->stride;
 	}
 
+	if (device->shader_stats_dump) {
+		radv_dump_pipeline_stats(device, pipeline);
+	}
+
 	return VK_SUCCESS;
 }
 
@@ -1429,6 +1509,10 @@ static VkResult radv_compute_pipeline_create(
 				       pipeline->layout, NULL, dump);
 
 	*pPipeline = radv_pipeline_to_handle(pipeline);
+
+	if (device->shader_stats_dump) {
+		radv_dump_pipeline_stats(device, pipeline);
+	}
 	return VK_SUCCESS;
 }
 VkResult radv_CreateComputePipelines(
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index c66ff5d..1999fe8 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -444,6 +444,8 @@ struct radv_device {
 	bool allow_fast_clears;
 	bool allow_dcc;
 
+	bool shader_stats_dump;
+
 	uint32_t scratch_waves;
 	/* MSAA sample locations.
 	 * The first index is the sample index.
@@ -804,6 +806,7 @@ struct radv_shader_variant {
 	struct ac_shader_variant_info info;
 	unsigned rsrc1;
 	unsigned rsrc2;
+	uint32_t code_size;
 };
 
 struct radv_depth_stencil_state {
-- 
2.7.4