[Mesa-dev] [PATCH] radv: add support for shader stats dump

Bas Nieuwenhuizen bas at basnieuwenhuizen.nl
Tue Nov 22 06:57:36 UTC 2016


Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

On Tue, Nov 22, 2016 at 5:44 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> I've started working on a shader-db alike for Vulkan,
> it's based on vktrace and it records pipelines, this
> adds support to dump the shader stats exactly like
> radeonsi does, so I can reuse the shader-db scripts it
> uses.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/amd/vulkan/radv_device.c   |  2 +-
>  src/amd/vulkan/radv_pipeline.c | 84 ++++++++++++++++++++++++++++++++++++++++++
>  src/amd/vulkan/radv_private.h  |  3 ++
>  3 files changed, 88 insertions(+), 1 deletion(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index 5acaf56..313d7a5 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -682,7 +682,7 @@ VkResult radv_CreateDevice(
>         }
>         device->allow_fast_clears = env_var_as_boolean("RADV_FAST_CLEARS", false);
>         device->allow_dcc = !env_var_as_boolean("RADV_DCC_DISABLE", true);
> -
> +       device->shader_stats_dump = env_var_as_boolean("RADV_SHADER_STATS", false);
>         if (device->allow_fast_clears && device->allow_dcc)
>                 radv_finishme("DCC fast clears have not been tested\n");
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index fca0173..5f3ebb3 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -257,6 +257,81 @@ radv_shader_compile_to_nir(struct radv_device *device,
>         return nir;
>  }
>
> +static const char *radv_get_shader_name(struct radv_shader_variant *var,
> +                                       gl_shader_stage stage)
> +{
> +       switch (stage) {
> +       case MESA_SHADER_VERTEX: return "Vertex Shader as VS";
> +       case MESA_SHADER_FRAGMENT: return "Pixel Shader";
> +       case MESA_SHADER_COMPUTE: return "Compute Shader";
> +       default:
> +               return "Unknown shader";
> +       };
> +
> +}
> +static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
> +{
> +       unsigned lds_increment = device->instance->physicalDevice.rad_info.chip_class >= CIK ? 512 : 256;
> +       struct radv_shader_variant *var;
> +       struct ac_shader_config *conf;
> +       int i;
> +       FILE *file = stderr;
> +       unsigned max_simd_waves = 10;
> +       unsigned lds_per_wave = 0;
> +
> +       for (i = 0; i < MESA_SHADER_STAGES; i++) {
> +               if (!pipeline->shaders[i])
> +                       continue;
> +               var = pipeline->shaders[i];
> +
> +               conf = &var->config;
> +
> +               if (i == MESA_SHADER_FRAGMENT) {
> +                       lds_per_wave = conf->lds_size * lds_increment +
> +                               align(var->info.fs.num_interp * 48, lds_increment);
> +               }
> +
> +               if (conf->num_sgprs) {
> +                       if (device->instance->physicalDevice.rad_info.chip_class >= VI)
> +                               max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
> +                       else
> +                               max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
> +               }
> +
> +               if (conf->num_vgprs)
> +                       max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
> +
> +               /* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
> +                * that PS can use.
> +                */
> +               if (lds_per_wave)
> +                       max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);

For SI we had 32 KiB/CU? Not that I expect it to matter since we don't
really use LDS for PS.

> +
> +               fprintf(file, "\n%s:\n",
> +                       radv_get_shader_name(var, i));
> +               if (i == MESA_SHADER_FRAGMENT) {
> +                       fprintf(file, "*** SHADER CONFIG ***\n"
> +                               "SPI_PS_INPUT_ADDR = 0x%04x\n"
> +                               "SPI_PS_INPUT_ENA  = 0x%04x\n",
> +                               conf->spi_ps_input_addr, conf->spi_ps_input_ena);
> +               }
> +               fprintf(file, "*** SHADER STATS ***\n"
> +                       "SGPRS: %d\n"
> +                       "VGPRS: %d\n"
> +                       "Spilled SGPRs: %d\n"
> +                       "Spilled VGPRs: %d\n"
> +                       "Code Size: %d bytes\n"
> +                       "LDS: %d blocks\n"
> +                       "Scratch: %d bytes per wave\n"
> +                       "Max Waves: %d\n"
> +                       "********************\n\n\n",
> +                       conf->num_sgprs, conf->num_vgprs,
> +                       conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
> +                       conf->lds_size, conf->scratch_bytes_per_wave,
> +                       max_simd_waves);
> +       }
> +}
> +
>  void radv_shader_variant_destroy(struct radv_device *device,
>                                   struct radv_shader_variant *variant)
>  {
> @@ -297,6 +372,7 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic
>                               &variant->info, shader, &options, dump);
>         LLVMDisposeTargetMachine(tm);
>
> +       variant->code_size = binary.code_size;
>         bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
>         unsigned vgpr_comp_cnt = 0;
>
> @@ -1336,6 +1412,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
>                 pipeline->binding_stride[desc->binding] = desc->stride;
>         }
>
> +       if (device->shader_stats_dump) {
> +               radv_dump_pipeline_stats(device, pipeline);
> +       }
> +
>         return VK_SUCCESS;
>  }
>
> @@ -1429,6 +1509,10 @@ static VkResult radv_compute_pipeline_create(
>                                        pipeline->layout, NULL, dump);
>
>         *pPipeline = radv_pipeline_to_handle(pipeline);
> +
> +       if (device->shader_stats_dump) {
> +               radv_dump_pipeline_stats(device, pipeline);
> +       }
>         return VK_SUCCESS;
>  }
>  VkResult radv_CreateComputePipelines(
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index c66ff5d..1999fe8 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -444,6 +444,8 @@ struct radv_device {
>         bool allow_fast_clears;
>         bool allow_dcc;
>
> +       bool shader_stats_dump;
> +
>         uint32_t scratch_waves;
>         /* MSAA sample locations.
>          * The first index is the sample index.
> @@ -804,6 +806,7 @@ struct radv_shader_variant {
>         struct ac_shader_variant_info info;
>         unsigned rsrc1;
>         unsigned rsrc2;
> +       uint32_t code_size;
>  };
>
>  struct radv_depth_stencil_state {
> --
> 2.7.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list