[Mesa-dev] [PATCH 2/2] radv: Implement VK_AMD_shader_info

Wed Oct 25 12:20:51 UTC 2017

On 25 October 2017 at 12:46, Samuel Pitoiset <samuel.pitoiset at gmail.com>
wrote:

> I have something similar on my local tree (started on monday).
>
> Though, I don't like the way we expose the number of VGPRS/SGPRS because
> we can't really figure out the number of spilled ones.

My assumption was that if we've spilled then we've used all available
registers, so if numUsed{V,S}gprs is greater than the number available,
then you'd know that the number spilled is the difference between the two.
Can we have spilling when num_{v,s}gprs is less than the number available?

Alex

>
>
> On 10/25/2017 01:18 PM, Alex Smith wrote:
>
>> This allows an app to query shader statistics and get a disassembly of
>> a shader. RenderDoc git has support for it, so this allows you to view
>> shader disassembly from a capture.
>>
>> When this extension is enabled on a device (or when tracing), we now
>> disable pipeline caching, since we don't get the shader debug info when
>> we retrieve cached shaders.
>>
>> Signed-off-by: Alex Smith <asmith at feralinteractive.com>
>> ---
>>   src/amd/vulkan/radv_device.c         |   9 ++
>>   src/amd/vulkan/radv_extensions.py    |   1 +
>>   src/amd/vulkan/radv_pipeline.c       |   2 +-
>>   src/amd/vulkan/radv_pipeline_cache.c |  11 ++-
>>   src/amd/vulkan/radv_private.h        |   3 +
>>   src/amd/vulkan/radv_shader.c         | 163
>> ++++++++++++++++++++++++++++-------
>>   6 files changed, 154 insertions(+), 35 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index c4e25222ea..5603551680 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -943,10 +943,15 @@ VkResult radv_CreateDevice(
>>         VkResult result;
>>         struct radv_device *device;
>>   +     bool keep_shader_info = false;
>> +
>>         for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount;
>> i++) {
>>                 const char *ext_name = pCreateInfo->ppEnabledExtensio
>> nNames[i];
>>                 if (!radv_physical_device_extension_supported(physical_device,
>> ext_name))
>>                         return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
>> +
>> +               if (strcmp(ext_name, VK_AMD_SHADER_INFO_EXTENSION_NAME)
>> == 0)
>> +                       keep_shader_info = true;
>>         }
>>         /* Check enabled features */
>> @@ -1040,10 +1045,14 @@ VkResult radv_CreateDevice(
>>                 device->physical_device->rad_info.max_se >= 2;
>>         if (getenv("RADV_TRACE_FILE")) {
>> +               keep_shader_info = true;
>> +
>>                 if (!radv_init_trace(device))
>>                         goto fail;
>>         }
>>   +     device->keep_shader_info = keep_shader_info;
>> +
>>         result = radv_device_init_meta(device);
>>         if (result != VK_SUCCESS)
>>                 goto fail;
>> diff --git a/src/amd/vulkan/radv_extensions.py
>> b/src/amd/vulkan/radv_extensions.py
>> index dfeb2880fc..eeb679d65a 100644
>> --- a/src/amd/vulkan/radv_extensions.py
>> +++ b/src/amd/vulkan/radv_extensions.py
>> @@ -81,6 +81,7 @@ EXTENSIONS = [
>>       Extension('VK_EXT_global_priority',                   1,
>> 'device->rad_info.has_ctx_priority'),
>>       Extension('VK_AMD_draw_indirect_count',               1, True),
>>       Extension('VK_AMD_rasterization_order',               1,
>> 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
>> +    Extension('VK_AMD_shader_info',                       1, True),
>>   ]
>>     class VkVersion:
>> diff --git a/src/amd/vulkan/radv_pipeline.c
>> b/src/amd/vulkan/radv_pipeline.c
>> index d6b33a5327..2df03a83cf 100644
>> --- a/src/amd/vulkan/radv_pipeline.c
>> +++ b/src/amd/vulkan/radv_pipeline.c
>> @@ -1874,7 +1874,7 @@ void radv_create_shaders(struct radv_pipeline
>> *pipeline,
>>                         if (device->instance->debug_flags &
>> RADV_DEBUG_DUMP_SHADERS)
>>                                 nir_print_shader(nir[i], stderr);
>>   -                     if (!pipeline->device->trace_bo)
>> +                       if (!pipeline->device->keep_shader_info)
>>                                 ralloc_free(nir[i]);
>>                 }
>>         }
>> diff --git a/src/amd/vulkan/radv_pipeline_cache.c
>> b/src/amd/vulkan/radv_pipeline_cache.c
>> index 9ba9a3b61b..46198799a7 100644
>> --- a/src/amd/vulkan/radv_pipeline_cache.c
>> +++ b/src/amd/vulkan/radv_pipeline_cache.c
>> @@ -62,9 +62,11 @@ radv_pipeline_cache_init(struct radv_pipeline_cache
>> *cache,
>>         cache->hash_table = malloc(byte_size);
>>         /* We don't consider allocation failure fatal, we just start with
>> a 0-sized
>> -        * cache. */
>> +        * cache. Disable caching when we want to keep shader debug info,
>> since
>> +        * we don't get the debug info on cached shaders. */
>>         if (cache->hash_table == NULL ||
>> -           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
>> +           (device->instance->debug_flags & RADV_DEBUG_NO_CACHE) ||
>> +           device->keep_shader_info)
>>                 cache->table_size = 0;
>>         else
>>                 memset(cache->hash_table, 0, byte_size);
>> @@ -186,8 +188,11 @@ radv_create_shader_variants_from_pipeline_cache(struct
>> radv_device *device,
>>         entry = radv_pipeline_cache_search_unlocked(cache, sha1);
>>         if (!entry) {
>> +               /* Again, don't cache when we want debug info, since this
>> isn't
>> +                * present in the cache. */
>>                 if (!device->physical_device->disk_cache ||
>> -                   (device->instance->debug_flags &
>> RADV_DEBUG_NO_CACHE)) {
>> +                   (device->instance->debug_flags &
>> RADV_DEBUG_NO_CACHE) ||
>> +                   device->keep_shader_info) {
>>                         pthread_mutex_unlock(&cache->mutex);
>>                         return false;
>>                 }
>> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.
>> h
>> index a4e52b2530..169df5f37b 100644
>> --- a/src/amd/vulkan/radv_private.h
>> +++ b/src/amd/vulkan/radv_private.h
>> @@ -552,6 +552,9 @@ struct radv_device {
>>         struct radeon_winsys_bo                      *trace_bo;
>>         uint32_t                                     *trace_id_ptr;
>>   +     /* Whether to keep shader debug info, for tracing or
>> VK_AMD_shader_info */
>> +       bool                                         keep_shader_info;
>> +
>>         struct radv_physical_device                  *physical_device;
>>         /* Backup in-memory cache to be used if the app doesn't provide
>> one */
>> diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
>> index 5903917068..7f2f0fd750 100644
>> --- a/src/amd/vulkan/radv_shader.c
>> +++ b/src/amd/vulkan/radv_shader.c
>> @@ -46,6 +46,8 @@
>>   #include "util/debug.h"
>>   #include "ac_exp_param.h"
>>   +#include "util/string_buffer.h"
>> +
>>   static const struct nir_shader_compiler_options nir_options = {
>>         .vertex_id_zero_based = true,
>>         .lower_scmp = true,
>> @@ -471,7 +473,7 @@ shader_variant_create(struct radv_device *device,
>>         free(binary.relocs);
>>         variant->ref_count = 1;
>>   -     if (device->trace_bo) {
>> +       if (device->keep_shader_info) {
>>                 variant->disasm_string = binary.disasm_string;
>>                 if (!gs_copy_shader && !module->nir) {
>>                         variant->nir = *shaders;
>> @@ -593,11 +595,20 @@ radv_get_shader_name(struct radv_shader_variant
>> *var, gl_shader_stage stage)
>>         };
>>   }
>>   -void
>> -radv_shader_dump_stats(struct radv_device *device,
>> -                      struct radv_shader_variant *variant,
>> -                      gl_shader_stage stage,
>> -                      FILE *file)
>> +static uint32_t
>> +get_total_sgprs(struct radv_device *device)
>> +{
>> +       if (device->physical_device->rad_info.chip_class >= VI)
>> +               return 800;
>> +       else
>> +               return 512;
>> +}
>> +
>> +static void
>> +generate_shader_stats(struct radv_device *device,
>> +                     struct radv_shader_variant *variant,
>> +                     gl_shader_stage stage,
>> +                     struct _mesa_string_buffer *buf)
>>   {
>>         unsigned lds_increment = device->physical_device->rad_info.chip_class
>> >= CIK ? 512 : 256;
>>         struct ac_shader_config *conf;
>> @@ -623,12 +634,8 @@ radv_shader_dump_stats(struct radv_device *device,
>>                                      lds_increment);
>>         }
>>   -     if (conf->num_sgprs) {
>> -               if (device->physical_device->rad_info.chip_class >= VI)
>> -                       max_simd_waves = MIN2(max_simd_waves, 800 /
>> conf->num_sgprs);
>> -               else
>> -                       max_simd_waves = MIN2(max_simd_waves, 512 /
>> conf->num_sgprs);
>> -       }
>> +       if (conf->num_sgprs)
>> +               max_simd_waves = MIN2(max_simd_waves,
>> get_total_sgprs(device) / conf->num_sgprs);
>>         if (conf->num_vgprs)
>>                 max_simd_waves = MIN2(max_simd_waves, 256 /
>> conf->num_vgprs);
>> @@ -639,27 +646,121 @@ radv_shader_dump_stats(struct radv_device *device,
>>         if (lds_per_wave)
>>                 max_simd_waves = MIN2(max_simd_waves, 16384 /
>> lds_per_wave);
>>   +     if (stage == MESA_SHADER_FRAGMENT) {
>> +               _mesa_string_buffer_printf(buf, "*** SHADER CONFIG ***\n"
>> +                                          "SPI_PS_INPUT_ADDR = 0x%04x\n"
>> +                                          "SPI_PS_INPUT_ENA  = 0x%04x\n",
>> +                                          conf->spi_ps_input_addr,
>> conf->spi_ps_input_ena);
>> +       }
>> +
>> +       _mesa_string_buffer_printf(buf, "*** SHADER STATS ***\n"
>> +                                  "SGPRS: %d\n"
>> +                                  "VGPRS: %d\n"
>> +                                  "Spilled SGPRs: %d\n"
>> +                                  "Spilled VGPRs: %d\n"
>> +                                  "Code Size: %d bytes\n"
>> +                                  "LDS: %d blocks\n"
>> +                                  "Scratch: %d bytes per wave\n"
>> +                                  "Max Waves: %d\n"
>> +                                  "********************\n\n\n",
>> +                                  conf->num_sgprs, conf->num_vgprs,
>> +                                  conf->spilled_sgprs,
>> conf->spilled_vgprs, variant->code_size,
>> +                                  conf->lds_size,
>> conf->scratch_bytes_per_wave,
>> +                                  max_simd_waves);
>> +}
>> +
>> +void
>> +radv_shader_dump_stats(struct radv_device *device,
>> +                      struct radv_shader_variant *variant,
>> +                      gl_shader_stage stage,
>> +                      FILE *file)
>> +{
>> +       struct _mesa_string_buffer *buf = _mesa_string_buffer_create(NULL,
>> 256);
>> +
>> +       generate_shader_stats(device, variant, stage, buf);
>> +
>>         fprintf(file, "\n%s:\n", radv_get_shader_name(variant, stage));
>> +       fprintf(file, buf->buf);
>>   -     if (stage == MESA_SHADER_FRAGMENT) {
>> -               fprintf(file, "*** SHADER CONFIG ***\n"
>> -                       "SPI_PS_INPUT_ADDR = 0x%04x\n"
>> -                       "SPI_PS_INPUT_ENA  = 0x%04x\n",
>> -                       conf->spi_ps_input_addr, conf->spi_ps_input_ena);
>> +       _mesa_string_buffer_destroy(buf);
>> +}
>> +
>> +VkResult
>> +radv_GetShaderInfoAMD(VkDevice _device,
>> +                     VkPipeline _pipeline,
>> +                     VkShaderStageFlagBits shaderStage,
>> +                     VkShaderInfoTypeAMD infoType,
>> +                     size_t* pInfoSize,
>> +                     void* pInfo)
>> +{
>> +       RADV_FROM_HANDLE(radv_device, device, _device);
>> +       RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
>> +       gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
>> +       struct radv_shader_variant *variant = pipeline->shaders[stage];
>> +       struct _mesa_string_buffer *buf;
>> +       VkResult result = VK_SUCCESS;
>> +
>> +       /* Spec doesn't indicate what to do if the stage is invalid, so
>> just
>> +        * return no info for this. */
>> +       if (!variant)
>> +               return VK_ERROR_FEATURE_NOT_PRESENT;
>> +
>> +       switch (infoType) {
>> +       case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
>> +               if (!pInfo) {
>> +                       *pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
>> +               } else {
>> +                       struct ac_shader_config *conf = &variant->config;
>> +
>> +                       VkShaderStatisticsInfoAMD statistics = {};
>> +                       statistics.shaderStageMask = shaderStage;
>> +                       statistics.resourceUsage.numUsedVgprs =
>> conf->num_vgprs + conf->spilled_vgprs;
>> +                       statistics.resourceUsage.numUsedSgprs =
>> conf->num_sgprs + conf->spilled_sgprs;
>> +                       statistics.resourceUsage.ldsSizePerLocalWorkGroup
>> = 16384;
>> +                       statistics.resourceUsage.ldsUsageSizeInBytes =
>> conf->lds_size;
>> +                       statistics.resourceUsage.scratchMemUsageInBytes
>> = conf->scratch_bytes_per_wave;
>> +                       statistics.numPhysicalVgprs =
>> statistics.numAvailableVgprs = 256;
>> +                       statistics.numPhysicalSgprs =
>> statistics.numAvailableSgprs = get_total_sgprs(device);
>> +                       statistics.computeWorkGroupSize[0] =
>> variant->nir->info.cs.local_size[0];
>> +                       statistics.computeWorkGroupSize[1] =
>> variant->nir->info.cs.local_size[1];
>> +                       statistics.computeWorkGroupSize[2] =
>> variant->nir->info.cs.local_size[2];
>> +
>> +                       size_t size = *pInfoSize;
>> +                       *pInfoSize = sizeof(statistics);
>> +
>> +                       memcpy(pInfo, &statistics, MIN2(size,
>> *pInfoSize));
>> +
>> +                       if (size < *pInfoSize)
>> +                               result = VK_INCOMPLETE;
>> +               }
>> +
>> +               break;
>> +       case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD:
>> +               buf = _mesa_string_buffer_create(NULL, 1024);
>> +
>> +               _mesa_string_buffer_printf(buf, "%s:\n",
>> radv_get_shader_name(variant, stage));
>> +               _mesa_string_buffer_printf(buf, "%s\n\n",
>> variant->disasm_string);
>> +               generate_shader_stats(device, variant, stage, buf);
>> +
>> +               if (!pInfo) {
>> +                       *pInfoSize = buf->length;
>> +               } else {
>> +                       size_t size = *pInfoSize;
>> +                       *pInfoSize = buf->length;
>> +
>> +                       memcpy(pInfo, buf->buf, MIN2(size, buf->length));
>> +
>> +                       if (size < buf->length)
>> +                               result = VK_INCOMPLETE;
>> +               }
>> +
>> +               _mesa_string_buffer_destroy(buf);
>> +               break;
>> +       default:
>> +               /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now.
>> */
>> +               result = VK_ERROR_FEATURE_NOT_PRESENT;
>> +               break;
>>         }
>>   -     fprintf(file, "*** SHADER STATS ***\n"
>> -               "SGPRS: %d\n"
>> -               "VGPRS: %d\n"
>> -               "Spilled SGPRs: %d\n"
>> -               "Spilled VGPRs: %d\n"
>> -               "Code Size: %d bytes\n"
>> -               "LDS: %d blocks\n"
>> -               "Scratch: %d bytes per wave\n"
>> -               "Max Waves: %d\n"
>> -               "********************\n\n\n",
>> -               conf->num_sgprs, conf->num_vgprs,
>> -               conf->spilled_sgprs, conf->spilled_vgprs,
>> variant->code_size,
>> -               conf->lds_size, conf->scratch_bytes_per_wave,
>> -               max_simd_waves);
>> +       return result;
>>   }
>>
>>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171025/c66692bb/attachment-0001.html>