Mesa (main): anv: Support workgroup memory in other shaders

Tue Jun 8 19:19:00 UTC 2021

Module: Mesa
Branch: main
Commit: 7c1c9e935e6a645fa47e47f784879a7b28bde785
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c1c9e935e6a645fa47e47f784879a7b28bde785

Author: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
Date:   Mon Jun  7 14:17:12 2021 -0700

anv: Support workgroup memory in other shaders

Mesh and Task shaders can use workgroup memory, so generalize its
handling in anv by moving it from anv_pipeline_compile_cs() to
anv_pipeline_lower_nir().

Update Pipeline Statistics accordingly.

Reviewed-by: Marcin Ślusarz <marcin.slusarz at intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11230>

---

 src/intel/vulkan/anv_pipeline.c | 76 +++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 37 deletions(-)

diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 3eb75f725e2..c7b3a721065 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -734,6 +734,18 @@ anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
    return NULL;
 }
 
+static void
+shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+   assert(glsl_type_is_vector_or_scalar(type));
+
+   uint32_t comp_size = glsl_type_is_boolean(type)
+      ? 4 : glsl_get_bit_size(type) / 8;
+   unsigned length = glsl_get_vector_elements(type);
+   *size = comp_size * length,
+   *align = comp_size * (length == 3 ? 4 : length);
+}
+
 static void
 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
                        void *mem_ctx,
@@ -813,6 +825,31 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
    anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
                                nir, prog_data, &stage->bind_map, mem_ctx);
 
+   if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
+      if (!nir->info.shared_memory_explicit_layout) {
+         NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
+                    nir_var_mem_shared, shared_type_info);
+      }
+
+      NIR_PASS_V(nir, nir_lower_explicit_io,
+                 nir_var_mem_shared, nir_address_format_32bit_offset);
+
+      if (nir->info.zero_initialize_shared_memory &&
+          nir->info.shared_size > 0) {
+         /* The effective Shared Local Memory size is at least 1024 bytes and
+          * is always rounded to a power of two, so it is OK to align the size
+          * used by the shader to chunk_size -- which does simplify the logic.
+          */
+         const unsigned chunk_size = 16;
+         const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+         assert(shared_size <=
+                intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
+
+         NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
+                    shared_size, chunk_size);
+      }
+   }
+
    stage->nir = nir;
 }
 
@@ -1701,18 +1738,6 @@ fail:
    return result;
 }
 
-static void
-shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
-{
-   assert(glsl_type_is_vector_or_scalar(type));
-
-   uint32_t comp_size = glsl_type_is_boolean(type)
-      ? 4 : glsl_get_bit_size(type) / 8;
-   unsigned length = glsl_get_vector_elements(type);
-   *size = comp_size * length,
-   *align = comp_size * (length == 3 ? 4 : length);
-}
-
 VkResult
 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
                         struct anv_pipeline_cache *cache,
@@ -1800,29 +1825,6 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
 
       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
 
-      if (!stage.nir->info.shared_memory_explicit_layout) {
-         NIR_PASS_V(stage.nir, nir_lower_vars_to_explicit_types,
-                    nir_var_mem_shared, shared_type_info);
-      }
-
-      NIR_PASS_V(stage.nir, nir_lower_explicit_io,
-                 nir_var_mem_shared, nir_address_format_32bit_offset);
-
-      if (stage.nir->info.zero_initialize_shared_memory &&
-          stage.nir->info.shared_size > 0) {
-         /* The effective Shared Local Memory size is at least 1024 bytes and
-          * is always rounded to a power of two, so it is OK to align the size
-          * used by the shader to chunk_size -- which does simplify the logic.
-          */
-         const unsigned chunk_size = 16;
-         const unsigned shared_size = ALIGN(stage.nir->info.shared_size, chunk_size);
-         assert(shared_size <=
-                intel_calculate_slm_size(compiler->devinfo->ver, stage.nir->info.shared_size));
-
-         NIR_PASS_V(stage.nir, nir_zero_initialize_shared_memory,
-                    shared_size, chunk_size);
-      }
-
       NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
 
       stage.num_stats = 1;
@@ -2553,12 +2555,12 @@ VkResult anv_GetPipelineExecutableStatisticsKHR(
       stat->value.u64 = prog_data->total_scratch;
    }
 
-   if (exe->stage == MESA_SHADER_COMPUTE) {
+   if (gl_shader_stage_uses_workgroup(exe->stage)) {
       vk_outarray_append(&out, stat) {
          WRITE_STR(stat->name, "Workgroup Memory Size");
          WRITE_STR(stat->description,
                    "Number of bytes of workgroup shared memory used by this "
-                   "compute shader including any padding.");
+                   "shader including any padding.");
          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
          stat->value.u64 = prog_data->total_shared;
       }