[Mesa-dev] [PATCH 2/2] radv: Implement VK_AMD_shader_info

Wed Oct 25 14:03:24 UTC 2017

On 10/25/2017 02:20 PM, Alex Smith wrote:
> On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset at gmail.com 
> <mailto:samuel.pitoiset at gmail.com>> wrote:
> 
>     I have something similar on my local tree (started on monday).
> 
>     Though, I don't like the way we expose the number of VGPRS/SGPRS
>     because we can't really figure out the number of spilled ones.
> 
> 
> My assumption was that if we've spilled then we've used all available 
> registers, so if numUsed{V,S}gprs is greater than the number available, 
> then you'd know that the number spilled is the difference between the 
> two. Can we have spilling when num_{v,s}gprs is less than the number 
> available?

Assuming the number of waves per CU is 4, I would go with:

num_available_vgprs = num_physical_vgprs (ie. 256) / max_simd_waves 
(aligned down to 4).

(or we can just set num_available_vgprs to conf->num_vgprs and return 
num_used_vgprs = conf->num_vgprs + conf->num_spilled_sgprs).

That way, if num_used_vgprs is greater than num_available_vgprs we know 
that we are spilling some vgprs.

For the number of available SGPRs, I think we can just hardcode the 
value to 104 for now.

Also with this, we can easily re-compute the maximum number of waves.

> 
> Alex
> 
> 
> 
>     On 10/25/2017 01:18 PM, Alex Smith wrote:
> 
>         This allows an app to query shader statistics and get a
>         disassembly of
>         a shader. RenderDoc git has support for it, so this allows you
>         to view
>         shader disassembly from a capture.
> 
>         When this extension is enabled on a device (or when tracing), we now
>         disable pipeline caching, since we don't get the shader debug
>         info when
>         we retrieve cached shaders.
> 
>         Signed-off-by: Alex Smith <asmith at feralinteractive.com
>         <mailto:asmith at feralinteractive.com>>
>         ---
>            src/amd/vulkan/radv_device.c         |   9 ++
>            src/amd/vulkan/radv_extensions.py    |   1 +
>            src/amd/vulkan/radv_pipeline.c       |   2 +-
>            src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>            src/amd/vulkan/radv_private.h        |   3 +
>            src/amd/vulkan/radv_shader.c         | 163
>         ++++++++++++++++++++++++++++-------
>            6 files changed, 154 insertions(+), 35 deletions(-)
> 
>         diff --git a/src/amd/vulkan/radv_device.c
>         b/src/amd/vulkan/radv_device.c
>         index c4e25222ea..5603551680 100644
>         --- a/src/amd/vulkan/radv_device.c
>         +++ b/src/amd/vulkan/radv_device.c
>         @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>                  VkResult result;
>                  struct radv_device *device;
>            +     bool keep_shader_info = false;
>         +
>                  for (uint32_t i = 0; i <
>         pCreateInfo->enabledExtensionCount; i++) {
>                          const char *ext_name =
>         pCreateInfo->ppEnabledExtensionNames[i];
>                          if
>         (!radv_physical_device_extension_supported(physical_device,
>         ext_name))
>                                  return
>         vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
>         +
>         +               if (strcmp(ext_name,
>         VK_AMD_SHADER_INFO_EXTENSION_NAME) == 0)
>         +                       keep_shader_info = true;
>                  }
>                  /* Check enabled features */
>         @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>                          device->physical_device->rad_info.max_se >= 2;
>                  if (getenv("RADV_TRACE_FILE")) {
>         +               keep_shader_info = true;
>         +
>                          if (!radv_init_trace(device))
>                                  goto fail;
>                  }
>            +     device->keep_shader_info = keep_shader_info;
>         +
>                  result = radv_device_init_meta(device);
>                  if (result != VK_SUCCESS)
>                          goto fail;
>         diff --git a/src/amd/vulkan/radv_extensions.py
>         b/src/amd/vulkan/radv_extensions.py
>         index dfeb2880fc..eeb679d65a 100644
>         --- a/src/amd/vulkan/radv_extensions.py
>         +++ b/src/amd/vulkan/radv_extensions.py
>         @@ -81,6 +81,7 @@ EXTENSIONS = [
>                Extension('VK_EXT_global_priority',                   1,
>         'device->rad_info.has_ctx_priority'),
>                Extension('VK_AMD_draw_indirect_count',               1,
>         True),
>                Extension('VK_AMD_rasterization_order',               1,
>         'device->rad_info.chip_class >= VI && device->rad_info.max_se >=
>         2'),
>         +    Extension('VK_AMD_shader_info',                       1, True),
>            ]
>              class VkVersion:
>         diff --git a/src/amd/vulkan/radv_pipeline.c
>         b/src/amd/vulkan/radv_pipeline.c
>         index d6b33a5327..2df03a83cf 100644
>         --- a/src/amd/vulkan/radv_pipeline.c
>         +++ b/src/amd/vulkan/radv_pipeline.c
>         @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct
>         radv_pipeline *pipeline,
>                                  if (device->instance->debug_flags &
>         RADV_DEBUG_DUMP_SHADERS)
>                                          nir_print_shader(nir[i], stderr);
>            -                     if (!pipeline->device->trace_bo)
>         +                       if (!pipeline->device->keep_shader_info)
>                                          ralloc_free(nir[i]);
>                          }
>                  }
>         diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>         b/src/amd/vulkan/radv_pipeline_cache.c
>         index 9ba9a3b61b..46198799a7 100644
>         --- a/src/amd/vulkan/radv_pipeline_cache.c
>         +++ b/src/amd/vulkan/radv_pipeline_cache.c
>         @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct
>         radv_pipeline_cache *cache,
>                  cache->hash_table = malloc(byte_size);
>                  /* We don't consider allocation failure fatal, we just
>         start with a 0-sized
>         -        * cache. */
>         +        * cache. Disable caching when we want to keep shader
>         debug info, since
>         +        * we don't get the debug info on cached shaders. */
>                  if (cache->hash_table == NULL ||
>         -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
>         +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
>         +           device->keep_shader_info)
>                          cache->table_size = 0;
>                  else
>                          memset(cache->hash_table, 0, byte_size);
>         @@ -186,8 +188,11 @@
>         radv_create_shader_variants_from_pipeline_cache(struct
>         radv_device *device,
>                  entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>                  if (!entry) {
>         +               /* Again, don't cache when we want debug info,
>         since this isn't
>         +                * present in the cache. */
>                          if (!device->physical_device->disk_cache ||
>         -                   (device->instance->debug_flags &
>         RADV_DEBUG_NO_CACHE)) {
>         +                   (device->instance->debug_flags &
>         RADV_DEBUG_NO_CACHE) ||
>         +                   device->keep_shader_info) {
>                                  pthread_mutex_unlock(&cache->mutex);
>                                  return false;
>                          }
>         diff --git a/src/amd/vulkan/radv_private.h
>         b/src/amd/vulkan/radv_private.h
>         index a4e52b2530..169df5f37b 100644
>         --- a/src/amd/vulkan/radv_private.h
>         +++ b/src/amd/vulkan/radv_private.h
>         @@ -552,6 +552,9 @@ struct radv_device {
>                  struct radeon_winsys_bo                      *trace_bo;
>                  uint32_t                                     *trace_id_ptr;
>            +     /* Whether to keep shader debug info, for tracing or
>         VK_AMD_shader_info */
>         +       bool                                       
>           keep_shader_info;
>         +
>                  struct radv_physical_device                 
>         *physical_device;
>                  /* Backup in-memory cache to be used if the app doesn't
>         provide one */
>         diff --git a/src/amd/vulkan/radv_shader.c
>         b/src/amd/vulkan/radv_shader.c
>         index 5903917068..7f2f0fd750 100644
>         --- a/src/amd/vulkan/radv_shader.c
>         +++ b/src/amd/vulkan/radv_shader.c
>         @@ -46,6 +46,8 @@
>            #include "util/debug.h"
>            #include "ac_exp_param.h"
>            +#include "util/string_buffer.h"
>         +
>            static const struct nir_shader_compiler_options nir_options = {
>                  .vertex_id_zero_based = true,
>                  .lower_scmp = true,
>         @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device
>         *device,
>                  free(binary.relocs);
>                  variant->ref_count = 1;
>            -     if (device->trace_bo) {
>         +       if (device->keep_shader_info) {
>                          variant->disasm_string = binary.disasm_string;
>                          if (!gs_copy_shader && !module->nir) {
>                                  variant->nir = *shaders;
>         @@ -593,11 +595,20 @@ radv_get_shader_name(struct
>         radv_shader_variant *var, gl_shader_stage stage)
>                  };
>            }
>            -void
>         -radv_shader_dump_stats(struct radv_device *device,
>         -                      struct radv_shader_variant *variant,
>         -                      gl_shader_stage stage,
>         -                      FILE *file)
>         +static uint32_t
>         +get_total_sgprs(struct radv_device *device)
>         +{
>         +       if (device->physical_device->rad_info.chip_class >= VI)
>         +               return 800;
>         +       else
>         +               return 512;
>         +}
>         +
>         +static void
>         +generate_shader_stats(struct radv_device *device,
>         +                     struct radv_shader_variant *variant,
>         +                     gl_shader_stage stage,
>         +                     struct _mesa_string_buffer *buf)
>            {
>                  unsigned lds_increment =
>         device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
>                  struct ac_shader_config *conf;
>         @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device
>         *device,
>                                               lds_increment);
>                  }
>            -     if (conf->num_sgprs) {
>         -               if (device->physical_device->rad_info.chip_class
>          >= VI)
>         -                       max_simd_waves = MIN2(max_simd_waves,
>         800 / conf->num_sgprs);
>         -               else
>         -                       max_simd_waves = MIN2(max_simd_waves,
>         512 / conf->num_sgprs);
>         -       }
>         +       if (conf->num_sgprs)
>         +               max_simd_waves = MIN2(max_simd_waves,
>         get_total_sgprs(device) / conf->num_sgprs);
>                  if (conf->num_vgprs)
>                          max_simd_waves = MIN2(max_simd_waves, 256 /
>         conf->num_vgprs);
>         @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device
>         *device,
>                  if (lds_per_wave)
>                          max_simd_waves = MIN2(max_simd_waves, 16384 /
>         lds_per_wave);
>            +     if (stage == MESA_SHADER_FRAGMENT) {
>         +               _mesa_string_buffer_printf(buf, "*** SHADER
>         CONFIG ***\n"
>         +                                          "SPI_PS_INPUT_ADDR =
>         0x%04x\n"
>         +                                          "SPI_PS_INPUT_ENA  =
>         0x%04x\n",
>         +                                         
>         conf->spi_ps_input_addr, conf->spi_ps_input_ena);
>         +       }
>         +
>         +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
>         +                                  "SGPRS: %d\n"
>         +                                  "VGPRS: %d\n"
>         +                                  "Spilled SGPRs: %d\n"
>         +                                  "Spilled VGPRs: %d\n"
>         +                                  "Code Size: %d bytes\n"
>         +                                  "LDS: %d blocks\n"
>         +                                  "Scratch: %d bytes per wave\n"
>         +                                  "Max Waves: %d\n"
>         +                                  "********************\n\n\n",
>         +                                  conf->num_sgprs, conf->num_vgprs,
>         +                                  conf->spilled_sgprs,
>         conf->spilled_vgprs, variant->code_size,
>         +                                  conf->lds_size,
>         conf->scratch_bytes_per_wave,
>         +                                  max_simd_waves);
>         +}
>         +
>         +void
>         +radv_shader_dump_stats(struct radv_device *device,
>         +                      struct radv_shader_variant *variant,
>         +                      gl_shader_stage stage,
>         +                      FILE *file)
>         +{
>         +       struct _mesa_string_buffer *buf =
>         _mesa_string_buffer_create(NULL, 256);
>         +
>         +       generate_shader_stats(device, variant, stage, buf);
>         +
>                  fprintf(file, "\n%s:\n", radv_get_shader_name(variant,
>         stage));
>         +       fprintf(file, buf->buf);
>            -     if (stage == MESA_SHADER_FRAGMENT) {
>         -               fprintf(file, "*** SHADER CONFIG ***\n"
>         -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
>         -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
>         -                       conf->spi_ps_input_addr,
>         conf->spi_ps_input_ena);
>         +       _mesa_string_buffer_destroy(buf);
>         +}
>         +
>         +VkResult
>         +radv_GetShaderInfoAMD(VkDevice _device,
>         +                     VkPipeline _pipeline,
>         +                     VkShaderStageFlagBits shaderStage,
>         +                     VkShaderInfoTypeAMD infoType,
>         +                     size_t* pInfoSize,
>         +                     void* pInfo)
>         +{
>         +       RADV_FROM_HANDLE(radv_device, device, _device);
>         +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
>         +       gl_shader_stage stage =
>         vk_to_mesa_shader_stage(shaderStage);
>         +       struct radv_shader_variant *variant =
>         pipeline->shaders[stage];
>         +       struct _mesa_string_buffer *buf;
>         +       VkResult result = VK_SUCCESS;
>         +
>         +       /* Spec doesn't indicate what to do if the stage is
>         invalid, so just
>         +        * return no info for this. */
>         +       if (!variant)
>         +               return VK_ERROR_FEATURE_NOT_PRESENT;
>         +
>         +       switch (infoType) {
>         +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
>         +               if (!pInfo) {
>         +                       *pInfoSize =
>         sizeof(VkShaderStatisticsInfoAMD);
>         +               } else {
>         +                       struct ac_shader_config *conf =
>         &variant->config;
>         +
>         +                       VkShaderStatisticsInfoAMD statistics = {};
>         +                       statistics.shaderStageMask = shaderStage;
>         +                       statistics.resourceUsage.numUsedVgprs =
>         conf->num_vgprs + conf->spilled_vgprs;
>         +                       statistics.resourceUsage.numUsedSgprs =
>         conf->num_sgprs + conf->spilled_sgprs;
>         +                     
>           statistics.resourceUsage.ldsSizePerLocalWorkGroup = 16384;
>         +                     
>           statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size;
>         +                     
>           statistics.resourceUsage.scratchMemUsageInBytes =
>         conf->scratch_bytes_per_wave;
>         +                       statistics.numPhysicalVgprs =
>         statistics.numAvailableVgprs = 256;
>         +                       statistics.numPhysicalSgprs =
>         statistics.numAvailableSgprs = get_total_sgprs(device);
>         +                       statistics.computeWorkGroupSize[0] =
>         variant->nir->info.cs.local_size[0];
>         +                       statistics.computeWorkGroupSize[1] =
>         variant->nir->info.cs.local_size[1];
>         +                       statistics.computeWorkGroupSize[2] =
>         variant->nir->info.cs.local_size[2];
>         +
>         +                       size_t size = *pInfoSize;
>         +                       *pInfoSize = sizeof(statistics);
>         +
>         +                       memcpy(pInfo, &statistics, MIN2(size,
>         *pInfoSize));
>         +
>         +                       if (size < *pInfoSize)
>         +                               result = VK_INCOMPLETE;
>         +               }
>         +
>         +               break;
>         +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
>         +               buf = _mesa_string_buffer_create(NULL, 1024);
>         +
>         +               _mesa_string_buffer_printf(buf, "%s:\n",
>         radv_get_shader_name(variant, stage));
>         +               _mesa_string_buffer_printf(buf, "%s\n\n",
>         variant->disasm_string);
>         +               generate_shader_stats(device, variant, stage, buf);
>         +
>         +               if (!pInfo) {
>         +                       *pInfoSize = buf->length;
>         +               } else {
>         +                       size_t size = *pInfoSize;
>         +                       *pInfoSize = buf->length;
>         +
>         +                       memcpy(pInfo, buf->buf, MIN2(size,
>         buf->length));
>         +
>         +                       if (size < buf->length)
>         +                               result = VK_INCOMPLETE;
>         +               }
>         +
>         +               _mesa_string_buffer_destroy(buf);
>         +               break;
>         +       default:
>         +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented
>         for now. */
>         +               result = VK_ERROR_FEATURE_NOT_PRESENT;
>         +               break;
>                  }
>            -     fprintf(file, "*** SHADER STATS ***\n"
>         -               "SGPRS: %d\n"
>         -               "VGPRS: %d\n"
>         -               "Spilled SGPRs: %d\n"
>         -               "Spilled VGPRs: %d\n"
>         -               "Code Size: %d bytes\n"
>         -               "LDS: %d blocks\n"
>         -               "Scratch: %d bytes per wave\n"
>         -               "Max Waves: %d\n"
>         -               "********************\n\n\n",
>         -               conf->num_sgprs, conf->num_vgprs,
>         -               conf->spilled_sgprs, conf->spilled_vgprs,
>         variant->code_size,
>         -               conf->lds_size, conf->scratch_bytes_per_wave,
>         -               max_simd_waves);
>         +       return result;
>            }
> 
>