<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On 25 October 2017 at 12:46, Samuel Pitoiset <span dir="ltr"><<a href="mailto:samuel.pitoiset@gmail.com" target="_blank">samuel.pitoiset@gmail.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">I have something similar on my local tree (started on monday).<br>
<br>
Though, I don't like the way we expose the number of VGPRS/SGPRS because we can't really figure out the number of spilled ones.</blockquote><div><br></div><div>My assumption was that if we've spilled then we've used all available registers, so if numUsed{V,S}gprs is greater than the number available, then you'd know that the number spilled is the difference between the two. Can we have spilling when num_{v,s}gprs is less than the number available?</div><div><br></div><div>Alex</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><div class="gmail-HOEnZb"><div class="gmail-h5"><br>
<br>
On 10/25/2017 01:18 PM, Alex Smith wrote:<br>
<blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
This allows an app to query shader statistics and get a disassembly of<br>
a shader. RenderDoc git has support for it, so this allows you to view<br>
shader disassembly from a capture.<br>
<br>
When this extension is enabled on a device (or when tracing), we now<br>
disable pipeline caching, since we don't get the shader debug info when<br>
we retrieve cached shaders.<br>
<br>
Signed-off-by: Alex Smith <<a href="mailto:asmith@feralinteractive.com" target="_blank">asmith@feralinteractive.com</a>><br>
---<br>
  src/amd/vulkan/radv_device.c         |   9 ++<br>
  src/amd/vulkan/radv_extensions<wbr>.py    |   1 +<br>
  src/amd/vulkan/radv_pipeline.<wbr>c       |   2 +-<br>
  src/amd/vulkan/radv_pipeline_c<wbr>ache.c |  11 ++-<br>
  src/amd/vulkan/radv_private.h        |   3 +<br>
  src/amd/vulkan/radv_shader.c         | 163 ++++++++++++++++++++++++++++--<wbr>-----<br>
  6 files changed, 154 insertions(+), 35 deletions(-)<br>
<br>
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c<br>
index c4e25222ea..5603551680 100644<br>
--- a/src/amd/vulkan/radv_device.c<br>
+++ b/src/amd/vulkan/radv_device.c<br>
@@ -943,10 +943,15 @@ VkResult radv_CreateDevice(<br>
        VkResult result;<br>
        struct radv_device *device;<br>
  +     bool keep_shader_info = false;<br>
+<br>
        for (uint32_t i = 0; i < pCreateInfo->enabledExtensionC<wbr>ount; i++) {<br>
                const char *ext_name = pCreateInfo->ppEnabledExtensio<wbr>nNames[i];<br>
                if (!radv_physical_device_extensi<wbr>on_supported(physical_device, ext_name))<br>
                        return vk_error(VK_ERROR_EXTENSION_NO<wbr>T_PRESENT);<br>
+<br>
+               if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_N<wbr>AME) == 0)<br>
+                       keep_shader_info = true;<br>
        }<br>
        /* Check enabled features */<br>
@@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(<br>
                device->physical_device->rad_i<wbr>nfo.max_se >= 2;<br>
        if (getenv("RADV_TRACE_FILE")) {<br>
+               keep_shader_info = true;<br>
+<br>
                if (!radv_init_trace(device))<br>
                        goto fail;<br>
        }<br>
  +     device->keep_shader_info = keep_shader_info;<br>
+<br>
        result = radv_device_init_meta(device);<br>
        if (result != VK_SUCCESS)<br>
                goto fail;<br>
diff --git a/src/amd/vulkan/radv_extensio<wbr>ns.py b/src/amd/vulkan/radv_extensio<wbr>ns.py<br>
index dfeb2880fc..eeb679d65a 100644<br>
--- a/src/amd/vulkan/radv_extensio<wbr>ns.py<br>
+++ b/src/amd/vulkan/radv_extensio<wbr>ns.py<br>
@@ -81,6 +81,7 @@ EXTENSIONS = [<br>
      Extension('VK_EXT_global_prior<wbr>ity',                   1, 'device->rad_info.has_ctx_prio<wbr>rity'),<br>
      Extension('VK_AMD_draw_indirec<wbr>t_count',               1, True),<br>
      Extension('VK_AMD_rasterizatio<wbr>n_order',               1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),<br>
+    Extension('VK_AMD_shader_info'<wbr>,                       1, True),<br>
  ]<br>
    class VkVersion:<br>
diff --git a/src/amd/vulkan/radv_pipeline<wbr>.c b/src/amd/vulkan/radv_pipeline<wbr>.c<br>
index d6b33a5327..2df03a83cf 100644<br>
--- a/src/amd/vulkan/radv_pipeline<wbr>.c<br>
+++ b/src/amd/vulkan/radv_pipeline<wbr>.c<br>
@@ -1874,7 +1874,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,<br>
                        if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)<br>
                                nir_print_shader(nir[i], stderr);<br>
  -                     if (!pipeline->device->trace_bo)<br>
+                       if (!pipeline->device->keep_shade<wbr>r_info)<br>
                                ralloc_free(nir[i]);<br>
                }<br>
        }<br>
diff --git a/src/amd/vulkan/radv_pipeline<wbr>_cache.c b/src/amd/vulkan/radv_pipeline<wbr>_cache.c<br>
index 9ba9a3b61b..46198799a7 100644<br>
--- a/src/amd/vulkan/radv_pipeline<wbr>_cache.c<br>
+++ b/src/amd/vulkan/radv_pipeline<wbr>_cache.c<br>
@@ -62,9 +62,11 @@ radv_pipeline_cache_init(struc<wbr>t radv_pipeline_cache *cache,<br>
        cache->hash_table = malloc(byte_size);<br>
        /* We don't consider allocation failure fatal, we just start with a 0-sized<br>
-        * cache. */<br>
+        * cache. Disable caching when we want to keep shader debug info, since<br>
+        * we don't get the debug info on cached shaders. */<br>
        if (cache->hash_table == NULL ||<br>
-           (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE))<br>
+           (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE) ||<br>
+           device->keep_shader_info)<br>
                cache->table_size = 0;<br>
        else<br>
                memset(cache->hash_table, 0, byte_size);<br>
@@ -186,8 +188,11 @@ radv_create_shader_variants_fr<wbr>om_pipeline_cache(struct radv_device *device,<br>
        entry = radv_pipeline_cache_search_unl<wbr>ocked(cache, sha1);<br>
        if (!entry) {<br>
+               /* Again, don't cache when we want debug info, since this isn't<br>
+                * present in the cache. */<br>
                if (!device->physical_device->dis<wbr>k_cache ||<br>
-                   (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE)) {<br>
+                   (device->instance->debug_<wbr>flags & RADV_DEBUG_NO_CACHE) ||<br>
+                   device->keep_shader_info) {<br>
                        pthread_mutex_unlock(&cache->m<wbr>utex);<br>
                        return false;<br>
                }<br>
diff --git a/src/amd/vulkan/radv_private.<wbr>h b/src/amd/vulkan/radv_private.<wbr>h<br>
index a4e52b2530..169df5f37b 100644<br>
--- a/src/amd/vulkan/radv_private.<wbr>h<br>
+++ b/src/amd/vulkan/radv_private.<wbr>h<br>
@@ -552,6 +552,9 @@ struct radv_device {<br>
        struct radeon_winsys_bo                      *trace_bo;<br>
        uint32_t                                     *trace_id_ptr;<br>
  +     /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */<br>
+       bool                                         keep_shader_info;<br>
+<br>
        struct radv_physical_device                  *physical_device;<br>
        /* Backup in-memory cache to be used if the app doesn't provide one */<br>
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c<br>
index 5903917068..7f2f0fd750 100644<br>
--- a/src/amd/vulkan/radv_shader.c<br>
+++ b/src/amd/vulkan/radv_shader.c<br>
@@ -46,6 +46,8 @@<br>
  #include "util/debug.h"<br>
  #include "ac_exp_param.h"<br>
  +#include "util/string_buffer.h"<br>
+<br>
  static const struct nir_shader_compiler_options nir_options = {<br>
        .vertex_id_zero_based = true,<br>
        .lower_scmp = true,<br>
@@ -471,7 +473,7 @@ shader_variant_create(struct radv_device *device,<br>
        free(binary.relocs);<br>
        variant->ref_count = 1;<br>
  -     if (device->trace_bo) {<br>
+       if (device->keep_shader_info) {<br>
                variant->disasm_string = binary.disasm_string;<br>
                if (!gs_copy_shader && !module->nir) {<br>
                        variant->nir = *shaders;<br>
@@ -593,11 +595,20 @@ radv_get_shader_name(struct radv_shader_variant *var, gl_shader_stage stage)<br>
        };<br>
  }<br>
  -void<br>
-radv_shader_dump_stats(struct radv_device *device,<br>
-                      struct radv_shader_variant *variant,<br>
-                      gl_shader_stage stage,<br>
-                      FILE *file)<br>
+static uint32_t<br>
+get_total_sgprs(struct radv_device *device)<br>
+{<br>
+       if (device->physical_device->rad_<wbr>info.chip_class >= VI)<br>
+               return 800;<br>
+       else<br>
+               return 512;<br>
+}<br>
+<br>
+static void<br>
+generate_shader_stats(struct radv_device *device,<br>
+                     struct radv_shader_variant *variant,<br>
+                     gl_shader_stage stage,<br>
+                     struct _mesa_string_buffer *buf)<br>
  {<br>
        unsigned lds_increment = device->physical_device->rad_i<wbr>nfo.chip_class >= CIK ? 512 : 256;<br>
        struct ac_shader_config *conf;<br>
@@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device *device,<br>
                                     lds_increment);<br>
        }<br>
  -     if (conf->num_sgprs) {<br>
-               if (device->physical_device->rad_<wbr>info.chip_class >= VI)<br>
-                       max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);<br>
-               else<br>
-                       max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);<br>
-       }<br>
+       if (conf->num_sgprs)<br>
+               max_simd_waves = MIN2(max_simd_waves, get_total_sgprs(device) / conf->num_sgprs);<br>
        if (conf->num_vgprs)<br>
                max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);<br>
@@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device *device,<br>
        if (lds_per_wave)<br>
                max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);<br>
  +     if (stage == MESA_SHADER_FRAGMENT) {<br>
+               _mesa_string_buffer_printf(bu<wbr>f, "*** SHADER CONFIG ***\n"<br>
+                                          "SPI_PS_INPUT_ADDR = 0x%04x\n"<br>
+                                          "SPI_PS_INPUT_ENA  = 0x%04x\n",<br>
+                                          conf->spi_ps_input_addr, conf->spi_ps_input_ena);<br>
+       }<br>
+<br>
+       _mesa_string_buffer_printf(bu<wbr>f, "*** SHADER STATS ***\n"<br>
+                                  "SGPRS: %d\n"<br>
+                                  "VGPRS: %d\n"<br>
+                                  "Spilled SGPRs: %d\n"<br>
+                                  "Spilled VGPRs: %d\n"<br>
+                                  "Code Size: %d bytes\n"<br>
+                                  "LDS: %d blocks\n"<br>
+                                  "Scratch: %d bytes per wave\n"<br>
+                                  "Max Waves: %d\n"<br>
+                                  "********************\n\n\n",<br>
+                                  conf->num_sgprs, conf->num_vgprs,<br>
+                                  conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,<br>
+                                  conf->lds_size, conf->scratch_bytes_per_wave,<br>
+                                  max_simd_waves);<br>
+}<br>
+<br>
+void<br>
+radv_shader_dump_stats(struct radv_device *device,<br>
+                      struct radv_shader_variant *variant,<br>
+                      gl_shader_stage stage,<br>
+                      FILE *file)<br>
+{<br>
+       struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NUL<wbr>L, 256);<br>
+<br>
+       generate_shader_stats(device, variant, stage, buf);<br>
+<br>
        fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));<br>
+       fprintf(file, buf->buf);<br>
  -     if (stage == MESA_SHADER_FRAGMENT) {<br>
-               fprintf(file, "*** SHADER CONFIG ***\n"<br>
-                       "SPI_PS_INPUT_ADDR = 0x%04x\n"<br>
-                       "SPI_PS_INPUT_ENA  = 0x%04x\n",<br>
-                       conf->spi_ps_input_addr, conf->spi_ps_input_ena);<br>
+       _mesa_string_buffer_destroy(b<wbr>uf);<br>
+}<br>
+<br>
+VkResult<br>
+radv_GetShaderInfoAMD(VkDevic<wbr>e _device,<br>
+                     VkPipeline _pipeline,<br>
+                     VkShaderStageFlagBits shaderStage,<br>
+                     VkShaderInfoTypeAMD infoType,<br>
+                     size_t* pInfoSize,<br>
+                     void* pInfo)<br>
+{<br>
+       RADV_FROM_HANDLE(radv_device, device, _device);<br>
+       RADV_FROM_HANDLE(radv_pipelin<wbr>e, pipeline, _pipeline);<br>
+       gl_shader_stage stage = vk_to_mesa_shader_stage(shader<wbr>Stage);<br>
+       struct radv_shader_variant *variant = pipeline->shaders[stage];<br>
+       struct _mesa_string_buffer *buf;<br>
+       VkResult result = VK_SUCCESS;<br>
+<br>
+       /* Spec doesn't indicate what to do if the stage is invalid, so just<br>
+        * return no info for this. */<br>
+       if (!variant)<br>
+               return VK_ERROR_FEATURE_NOT_PRESENT;<br>
+<br>
+       switch (infoType) {<br>
+       case VK_SHADER_INFO_TYPE_STATISTICS<wbr>_AMD:<br>
+               if (!pInfo) {<br>
+                       *pInfoSize = sizeof(VkShaderStatisticsInfoA<wbr>MD);<br>
+               } else {<br>
+                       struct ac_shader_config *conf = &variant->config;<br>
+<br>
+                       VkShaderStatisticsInfoAMD statistics = {};<br>
+                       statistics.shaderStageMask = shaderStage;<br>
+                       statistics.resourceUsage.numU<wbr>sedVgprs = conf->num_vgprs + conf->spilled_vgprs;<br>
+                       statistics.resourceUsage.numU<wbr>sedSgprs = conf->num_sgprs + conf->spilled_sgprs;<br>
+                       statistics.resourceUsage.ldsS<wbr>izePerLocalWorkGroup = 16384;<br>
+                       statistics.resourceUsage.ldsU<wbr>sageSizeInBytes = conf->lds_size;<br>
+                       statistics.resourceUsage.scra<wbr>tchMemUsageInBytes = conf->scratch_bytes_per_wave;<br>
+                       statistics.numPhysicalVgprs = statistics.numAvailableVgprs = 256;<br>
+                       statistics.numPhysicalSgprs = statistics.numAvailableSgprs = get_total_sgprs(device);<br>
+                       statistics.computeWorkGroupSi<wbr>ze[0] = variant->nir->info.cs.local_si<wbr>ze[0];<br>
+                       statistics.computeWorkGroupSi<wbr>ze[1] = variant->nir->info.cs.local_si<wbr>ze[1];<br>
+                       statistics.computeWorkGroupSi<wbr>ze[2] = variant->nir->info.cs.local_si<wbr>ze[2];<br>
+<br>
+                       size_t size = *pInfoSize;<br>
+                       *pInfoSize = sizeof(statistics);<br>
+<br>
+                       memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));<br>
+<br>
+                       if (size < *pInfoSize)<br>
+                               result = VK_INCOMPLETE;<br>
+               }<br>
+<br>
+               break;<br>
+       case VK_SHADER_INFO_TYPE_DISASSEMBL<wbr>Y_AMD:<br>
+               buf = _mesa_string_buffer_create(NUL<wbr>L, 1024);<br>
+<br>
+               _mesa_string_buffer_printf(bu<wbr>f, "%s:\n", radv_get_shader_name(variant, stage));<br>
+               _mesa_string_buffer_printf(bu<wbr>f, "%s\n\n", variant->disasm_string);<br>
+               generate_shader_stats(device, variant, stage, buf);<br>
+<br>
+               if (!pInfo) {<br>
+                       *pInfoSize = buf->length;<br>
+               } else {<br>
+                       size_t size = *pInfoSize;<br>
+                       *pInfoSize = buf->length;<br>
+<br>
+                       memcpy(pInfo, buf->buf, MIN2(size, buf->length));<br>
+<br>
+                       if (size < buf->length)<br>
+                               result = VK_INCOMPLETE;<br>
+               }<br>
+<br>
+               _mesa_string_buffer_destroy(b<wbr>uf);<br>
+               break;<br>
+       default:<br>
+               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */<br>
+               result = VK_ERROR_FEATURE_NOT_PRESENT;<br>
+               break;<br>
        }<br>
  -     fprintf(file, "*** SHADER STATS ***\n"<br>
-               "SGPRS: %d\n"<br>
-               "VGPRS: %d\n"<br>
-               "Spilled SGPRs: %d\n"<br>
-               "Spilled VGPRs: %d\n"<br>
-               "Code Size: %d bytes\n"<br>
-               "LDS: %d blocks\n"<br>
-               "Scratch: %d bytes per wave\n"<br>
-               "Max Waves: %d\n"<br>
-               "********************\n\n\n",<br>
-               conf->num_sgprs, conf->num_vgprs,<br>
-               conf->spilled_sgprs, conf->spilled_vgprs, variant->code_size,<br>
-               conf->lds_size, conf->scratch_bytes_per_wave,<br>
-               max_simd_waves);<br>
+       return result;<br>
  }<br>
<br>
</blockquote>
</div></div></blockquote></div><br></div></div>