<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On 25 October 2017 at 12:46, Samuel Pitoiset <span dir="ltr"><<a href="mailto:samuel.pitoiset@gmail.com" target="_blank">samuel.pitoiset@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">I have something similar on my local tree (started on monday).<br>
<br>
Though, I don't like the way we expose the number of VGPRS/SGPRS because we can't really figure out the number of spilled ones.</blockquote><div><br></div><div>My assumption was that if we've spilled then we've used all available registers, so if numUsed{V,S}gprs is greater than the number available, then you'd know that the number spilled is the difference between the two. Can we have spilling when num_{v,s}gprs is less than the number available?</div><div><br></div><div>Alex</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div class="gmail-HOEnZb"><div class="gmail-h5"><br>
<br>
On 10/25/2017 01:18 PM, Alex Smith wrote:<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
This allows an app to query shader statistics and get a disassembly of<br>
a shader. RenderDoc git has support for it, so this allows you to view<br>
shader disassembly from a capture.<br>
<br>
When this extension is enabled on a device (or when tracing), we now<br>
disable pipeline caching, since we don't get the shader debug info when<br>
we retrieve cached shaders.<br>
<br>
Signed-off-by: Alex Smith <<a href="mailto:asmith@feralinteractive.com" target="_blank">asmith@feralinteractive.com</a>><br>
---<br>
src/amd/vulkan/radv_device.c | 9 ++<br>
src/amd/vulkan/radv_extensions<wbr>.py | 1 +<br>
src/amd/vulkan/radv_pipeline.<wbr>c | 2 +-<br>
src/amd/vulkan/radv_pipeline_c<wbr>ache.c | 11 ++-<br>
src/amd/vulkan/radv_private.h | 3 +<br>
src/amd/vulkan/radv_shader.c | 163 ++++++++++++++++++++++++++++--<wbr>-----<br>
6 files changed, 154 insertions(+), 35 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index c4e25222ea..5603551680 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -943,10 +943,15 @@ VkResult radv_CreateDevice(<br>
VkResult result;<br>
struct radv_device *device;<br>
+ bool keep_shader_info = false;<br>
+<br>
for (uint32_t i = 0; i < pCreateInfo->enabledExtensionC<wbr>ount; i++) {<br>
const char *ext_name = pCreateInfo->ppEnabledExtensio<wbr>nNames[i];<br>
if (!radv_physical_device_extensi<wbr>on_supported(physical_device, ext_name))<br>
return vk_error(VK_ERROR_EXTENSION_NO<wbr>T_PRESENT);<br>
+<br>
+ if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_N<wbr>AME) == 0)<br>
+ keep_shader_info = true;<br>
}<br>
/* Check enabled features */<br>
@@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(<br>
device->physical_device->rad_i<wbr>nfo.max_se >= 2;<br>
if (getenv("RADV_TRACE_FILE")) {<br>
+ keep_shader_info = true;<br>
+<br>
if (!radv_init_trace(device))<br>
goto fail;<br>
}<br>
+ device->keep_shader_info = keep_shader_info;<br>
+<br>
result = radv_device_init_meta(device);<br>
if (result != VK_SUCCESS)<br>
goto fail;<br>
diff --git a/src/amd/vulkan/radv_extensio<wbr>ns.py b/src/amd/vulkan/radv_extensio<wbr>ns.py<br>
index dfeb2880fc..eeb679d65a 100644<br>
--- a/src/amd/vulkan/radv_extensio<wbr>ns.py<br>
+++ b/src/amd/vulkan/radv_extensio<wbr>ns.py<br>
@@ -81,6 +81,7 @@ EXTENSIONS = [<br>
Extension('VK_EXT_global_prior<wbr>ity', 1, 'device->rad_info.has_ctx_prio<wbr>rity'),<br>
Extension('VK_AMD_draw_indirec<wbr>t_count', 1, True),<br>
Extension('VK_AMD_rasterizatio<wbr>n_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),<br>
+ Extension('VK_AMD_shader_info'<wbr>, 1, True),<br>
]<br>
class VkVersion:<br>
diff --git a/src/amd/vulkan/radv_pipeline<wbr>.c b/src/amd/vulkan/radv_pipeline<wbr>.c<br>
index d6b33a5327..2df03a83cf 100644<br>
--- a/src/amd/vulkan/radv_pipeline<wbr>.c<br>
+++ b/src/amd/vulkan/radv_pipeline<wbr>.c<br>
@@ -1874,7 +1874,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,<br>
if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)<br>
nir_print_shader(nir[i], stderr);<br>
- if (!pipeline->device->trace_bo)<br>
+ if (!pipeline->device->keep_shade<wbr>r_info)<br>
ralloc_free(nir[i]);<br>
}<br>
}<br>
diff --git a/src/amd/vulkan/radv_pipeline<wbr>_cache.c b/src/amd/vulkan/radv_pipeline<wbr>_cache.c<br>
index 9ba9a3b61b..46198799a7 100644<br>
--- a/src/amd/vulkan/radv_pipeline<wbr>_cache.c<br>
+++ b/src/amd/vulkan/radv_pipeline<wbr>_cache.c<br>
@@ -62,9 +62,11 @@ radv_pipeline_cache_init(struc<wbr>t radv_pipeline_cache *cache,<br>
cache->hash_table = malloc(byte_size);<br>
/* We don't consider allocation failure fatal, we just start with a 0-sized<br>
- * cache. */<br>
+ * cache. Disable caching when we want to keep shader debug info, since<br>
+ * we don't get the debug info on cached shaders. */<br>
if (cache->hash_table == NULL ||<br>
- (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE))<br>
+ (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE) ||<br>
+ device->keep_shader_info)<br>
cache->table_size = 0;<br>
else<br>
memset(cache->hash_table, 0, byte_size);<br>
@@ -186,8 +188,11 @@ radv_create_shader_variants_fr<wbr>om_pipeline_cache(struct radv_device *device,<br>
entry = radv_pipeline_cache_search_unl<wbr>ocked(cache, sha1);<br>
if (!entry) {<br>
+ /* Again, don't cache when we want debug info, since this isn't<br>
+ * present in the cache. */<br>
if (!device->physical_device->dis<wbr>k_cache ||<br>
- (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE)) {<br>
+ (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE) ||<br>
+ device->keep_shader_info) {<br>
pthread_mutex_unlock(&cache->m<wbr>utex);<br>
return false;<br>
}<br>
diff --git a/src/amd/vulkan/radv_private.<wbr>h b/src/amd/vulkan/radv_private.<wbr>h<br>
index a4e52b2530..169df5f37b 100644<br>
--- a/src/amd/vulkan/radv_private.<wbr>h<br>
+++ b/src/amd/vulkan/radv_private.<wbr>h<br>
@@ -552,6 +552,9 @@ struct radv_device {<br>
struct radeon_winsys_bo *trace_bo;<br>
uint32_t *trace_id_ptr;<br>
+ /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */<br>
+ bool keep_shader_info;<br>
+<br>
struct radv_physical_device *physical_device;<br>
/* Backup in-memory cache to be used if the app doesn't provide one */<br>
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c<br>
index 5903917068..7f2f0fd750 100644<br>
--- a/src/amd/vulkan/radv_shader.c<br>
+++ b/src/amd/vulkan/radv_shader.c<br>
@@ -46,6 +46,8 @@<br>
#include "util/debug.h"<br>
#include "ac_exp_param.h"<br>
+#include "util/string_buffer.h"<br>
+<br>
static const struct nir_shader_compiler_options nir_options = {<br>
.vertex_id_zero_based = true,<br>
.lower_scmp = true,<br>
@@ -471,7 +473,7 @@ shader_variant_create(struct radv_device *device,<br>
free(binary.relocs);<br>
variant->ref_count = 1;<br>
- if (device->trace_bo) {<br>
+ if (device->keep_shader_info) {<br>
variant->disasm_string = binary.disasm_string;<br>
if (!gs_copy_shader && !module->nir) {<br>
variant->nir = *shaders;<br>
@@ -593,11 +595,20 @@ radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)<br>
};<br>
}<br>
-void<br>
-radv_shader_dump_stats(struct radv_device *device,<br>
- struct radv_shader_variant *variant,<br>
- gl_shader_stage stage,<br>
- FILE *file)<br>
+static uint32_t<br>
+get_total_sgprs(struct radv_device *device)<br>
+{<br>
+ if (device->physical_device->rad_<wbr>info.chip_class >= VI)<br>
+ return 800;<br>
+ else<br>
+ return 512;<br>
+}<br>
+<br>
+static void<br>
+generate_shader_stats(struct radv_device *device,<br>
+ struct radv_shader_variant *variant,<br>
+ gl_shader_stage stage,<br>
+ struct _mesa_string_buffer *buf)<br>
{<br>
unsigned lds_increment = device->physical_device->rad_i<wbr>nfo.chip_class >= CIK ? 512 : 256;<br>
struct ac_shader_config *conf;<br>
@@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device *device,<br>
lds_increment);<br>
}<br>
- if (conf->num_sgprs) {<br>
- if (device->physical_device->rad_<wbr>info.chip_class >= VI)<br>
- max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);<br>
- else<br>
- max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);<br>
- }<br>
+ if (conf->num_sgprs)<br>
+ max_simd_waves = MIN2(max_simd_waves, get_total_sgprs(device) / conf->num_sgprs);<br>
if (conf->num_vgprs)<br>
max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);<br>
@@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device *device,<br>
if (lds_per_wave)<br>
max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);<br>
+ if (stage == MESA_SHADER_FRAGMENT) {<br>
+ _mesa_string_buffer_printf(bu<wbr>f, "*** SHADER CONFIG ***\n"<br>
+ "SPI_PS_INPUT_ADDR = 0x%04x\n"<br>
+ "SPI_PS_INPUT_ENA = 0x%04x\n",<br>
+ conf->spi_ps_input_addr, conf->spi_ps_input_ena);<br>
+ }<br>
+<br>
+ _mesa_string_buffer_printf(bu<wbr>f, "*** SHADER STATS ***\n"<br>
+ "SGPRS: %d\n"<br>
+ "VGPRS: %d\n"<br>
+ "Spilled SGPRs: %d\n"<br>
+ "Spilled VGPRs: %d\n"<br>
+ "Code Size: %d bytes\n"<br>
+ "LDS: %d blocks\n"<br>
+ "Scratch: %d bytes per wave\n"<br>
+ "Max Waves: %d\n"<br>
+ "********************\n\n\n",<br>
+ conf->num_sgprs, conf->num_vgprs,<br>
+ conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,<br>
+ conf->lds_size, conf->scratch_bytes_per_wave,<br>
+ max_simd_waves);<br>
+}<br>
+<br>
+void<br>
+radv_shader_dump_stats(struct radv_device *device,<br>
+ struct radv_shader_variant *variant,<br>
+ gl_shader_stage stage,<br>
+ FILE *file)<br>
+{<br>
+ struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NUL<wbr>L, 256);<br>
+<br>
+ generate_shader_stats(device, variant, stage, buf);<br>
+<br>
fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));<br>
+ fprintf(file, buf->buf);<br>
- if (stage == MESA_SHADER_FRAGMENT) {<br>
- fprintf(file, "*** SHADER CONFIG ***\n"<br>
- "SPI_PS_INPUT_ADDR = 0x%04x\n"<br>
- "SPI_PS_INPUT_ENA = 0x%04x\n",<br>
- conf->spi_ps_input_addr, conf->spi_ps_input_ena);<br>
+ _mesa_string_buffer_destroy(b<wbr>uf);<br>
+}<br>
+<br>
+VkResult<br>
+radv_GetShaderInfoAMD(VkDevic<wbr>e _device,<br>
+ VkPipeline _pipeline,<br>
+ VkShaderStageFlagBits shaderStage,<br>
+ VkShaderInfoTypeAMD infoType,<br>
+ size_t* pInfoSize,<br>
+ void* pInfo)<br>
+{<br>
+ RADV_FROM_HANDLE(radv_device, device, _device);<br>
+ RADV_FROM_HANDLE(radv_pipelin<wbr>e, pipeline, _pipeline);<br>
+ gl_shader_stage stage = vk_to_mesa_shader_stage(shader<wbr>Stage);<br>
+ struct radv_shader_variant *variant = pipeline->shaders[stage];<br>
+ struct _mesa_string_buffer *buf;<br>
+ VkResult result = VK_SUCCESS;<br>
+<br>
+ /* Spec doesn't indicate what to do if the stage is invalid, so just<br>
+ * return no info for this. */<br>
+ if (!variant)<br>
+ return VK_ERROR_FEATURE_NOT_PRESENT;<br>
+<br>
+ switch (infoType) {<br>
+ case VK_SHADER_INFO_TYPE_STATISTICS<wbr>_AMD:<br>
+ if (!pInfo) {<br>
+ *pInfoSize = sizeof(VkShaderStatisticsInfoA<wbr>MD);<br>
+ } else {<br>
+ struct ac_shader_config *conf = &variant->config;<br>
+<br>
+ VkShaderStatisticsInfoAMD statistics = {};<br>
+ statistics.shaderStageMask = shaderStage;<br>
+ statistics.resourceUsage.numU<wbr>sedVgprs = conf->num_vgprs + conf->spilled_vgprs;<br>
+ statistics.resourceUsage.numU<wbr>sedSgprs = conf->num_sgprs + conf->spilled_sgprs;<br>
+ statistics.resourceUsage.ldsS<wbr>izePerLocalWorkGroup = 16384;<br>
+ statistics.resourceUsage.ldsU<wbr>sageSizeInBytes = conf->lds_size;<br>
+ statistics.resourceUsage.scra<wbr>tchMemUsageInBytes = conf->scratch_bytes_per_wave;<br>
+ statistics.numPhysicalVgprs = statistics.numAvailableVgprs = 256;<br>
+ statistics.numPhysicalSgprs = statistics.numAvailableSgprs = get_total_sgprs(device);<br>
+ statistics.computeWorkGroupSi<wbr>ze[0] = variant->nir->info.cs.local_si<wbr>ze[0];<br>
+ statistics.computeWorkGroupSi<wbr>ze[1] = variant->nir->info.cs.local_si<wbr>ze[1];<br>
+ statistics.computeWorkGroupSi<wbr>ze[2] = variant->nir->info.cs.local_si<wbr>ze[2];<br>
+<br>
+ size_t size = *pInfoSize;<br>
+ *pInfoSize = sizeof(statistics);<br>
+<br>
+ memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));<br>
+<br>
+ if (size < *pInfoSize)<br>
+ result = VK_INCOMPLETE;<br>
+ }<br>
+<br>
+ break;<br>
+ case VK_SHADER_INFO_TYPE_DISASSEMBL<wbr>Y_AMD:<br>
+ buf = _mesa_string_buffer_create(NUL<wbr>L, 1024);<br>
+<br>
+ _mesa_string_buffer_printf(bu<wbr>f, "%s:\n", radv_get_shader_name(variant, stage));<br>
+ _mesa_string_buffer_printf(bu<wbr>f, "%s\n\n", variant->disasm_string);<br>
+ generate_shader_stats(device, variant, stage, buf);<br>
+<br>
+ if (!pInfo) {<br>
+ *pInfoSize = buf->length;<br>
+ } else {<br>
+ size_t size = *pInfoSize;<br>
+ *pInfoSize = buf->length;<br>
+<br>
+ memcpy(pInfo, buf->buf, MIN2(size, buf->length));<br>
+<br>
+ if (size < buf->length)<br>
+ result = VK_INCOMPLETE;<br>
+ }<br>
+<br>
+ _mesa_string_buffer_destroy(b<wbr>uf);<br>
+ break;<br>
+ default:<br>
+ /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */<br>
+ result = VK_ERROR_FEATURE_NOT_PRESENT;<br>
+ break;<br>
}<br>
- fprintf(file, "*** SHADER STATS ***\n"<br>
- "SGPRS: %d\n"<br>
- "VGPRS: %d\n"<br>
- "Spilled SGPRs: %d\n"<br>
- "Spilled VGPRs: %d\n"<br>
- "Code Size: %d bytes\n"<br>
- "LDS: %d blocks\n"<br>
- "Scratch: %d bytes per wave\n"<br>
- "Max Waves: %d\n"<br>
- "********************\n\n\n",<br>
- conf->num_sgprs, conf->num_vgprs,<br>
- conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,<br>
- conf->lds_size, conf->scratch_bytes_per_wave,<br>
- max_simd_waves);<br>
+ return result;<br>
}<br>
<br>
</blockquote>
</div></div></blockquote></div><br></div></div>