Mesa (main): radv: Remove superfluous workgroup size calculations.

Thu Aug 26 10:11:32 UTC 2021

Module: Mesa
Branch: main
Commit: c4ca08548b0647d448a221b9c8c93a5bb3da990e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c4ca08548b0647d448a221b9c8c93a5bb3da990e

Author: Timur Kristóf <timur.kristof at gmail.com>
Date:   Wed Aug 11 08:53:55 2021 +0200

radv: Remove superfluous workgroup size calculations.

Signed-off-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12321>

---

 src/amd/vulkan/radv_nir_to_llvm.c | 18 +-----------------
 src/amd/vulkan/radv_private.h     |  3 ---
 src/amd/vulkan/radv_shader.c      | 40 ++++-----------------------------------
 src/amd/vulkan/radv_shader.h      |  3 ---
 4 files changed, 5 insertions(+), 59 deletions(-)

diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 04a8753bb03..08506d980c9 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -2905,17 +2905,6 @@ ac_setup_rings(struct radv_shader_context *ctx)
    }
 }
 
-unsigned
-radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
-                                const struct nir_shader *nir)
-{
-   const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
-   unsigned sizes[3];
-   for (unsigned i = 0; i < 3; i++)
-      sizes[i] = nir ? nir->info.workgroup_size[i] : backup_sizes[i];
-   return radv_get_max_workgroup_size(chip_class, stage, sizes);
-}
-
 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
 static void
 ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
@@ -2989,12 +2978,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
                         args->shader_info->ballot_bit_size);
    ctx.context = ctx.ac.context;
 
-   ctx.max_workgroup_size = 0;
-   for (int i = 0; i < shader_count; ++i) {
-      ctx.max_workgroup_size = MAX2(
-         ctx.max_workgroup_size, radv_nir_get_max_workgroup_size(
-                                    args->options->chip_class, shaders[i]->info.stage, shaders[i]));
-   }
+   ctx.max_workgroup_size = args->shader_info->workgroup_size;
 
    if (ctx.ac.chip_class >= GFX10) {
       if (is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg) {
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 1815cac0a99..adeea405b0d 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -2559,9 +2559,6 @@ void llvm_compile_shader(struct radv_device *device, unsigned shader_count,
                          struct nir_shader *const *shaders, struct radv_shader_binary **binary,
                          struct radv_shader_args *args);
 
-unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
-                                         const struct nir_shader *nir);
-
 /* radv_shader_info.h */
 struct radv_shader_info;
 struct radv_shader_variant_key;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index f12726e4c81..4931ab0f08f 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -925,7 +925,6 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
 
    ac_nir_ngg_config out_conf = {0};
    const struct gfx10_ngg_info *ngg_info = &info->ngg_info;
-   unsigned num_gs_invocations = (nir->info.stage != MESA_SHADER_GEOMETRY || ngg_info->max_vert_out_per_gs_instance) ? 1 : info->gs.invocations;
    unsigned num_vertices_per_prim = 3;
 
    /* Get the number of vertices per input primitive */
@@ -955,17 +954,6 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
 
    /* Invocations that process an input vertex */
    unsigned max_vtx_in = MIN2(256, ngg_info->enable_vertex_grouping ? ngg_info->hw_max_esverts : num_vertices_per_prim * ngg_info->max_gsprims);
-   /* Invocations that export an output vertex */
-   unsigned max_vtx_out = ngg_info->max_out_verts;
-   /* Invocations that process an input primitive */
-   unsigned max_prm_in = ngg_info->max_gsprims * num_gs_invocations;
-   /* Invocations that produce an output primitive */
-   unsigned max_prm_out = ngg_info->max_gsprims * num_gs_invocations * ngg_info->prim_amp_factor;
-
-   unsigned max_workgroup_size = MAX4(max_vtx_in, max_vtx_out, max_prm_in, max_prm_out);
-
-   /* Maximum HW limit for NGG workgroups */
-   max_workgroup_size = MIN2(256, max_workgroup_size);
 
    if (nir->info.stage == MESA_SHADER_VERTEX ||
        nir->info.stage == MESA_SHADER_TESS_EVAL) {
@@ -979,7 +967,7 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
             nir,
             max_vtx_in,
             num_vertices_per_prim,
-            max_workgroup_size,
+            info->workgroup_size,
             info->wave_size,
             consider_culling,
             key->vs_common_out.as_ngg_passthrough,
@@ -994,7 +982,7 @@ void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
    } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
       assert(info->is_ngg);
       ac_nir_lower_ngg_gs(
-         nir, info->wave_size, max_workgroup_size,
+         nir, info->wave_size, info->workgroup_size,
          info->ngg_info.esgs_ring_size,
          info->gs.gsvs_vertex_size,
          info->ngg_info.ngg_emit_size * 4u,
@@ -1747,25 +1735,6 @@ radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage)
    };
 }
 
-unsigned
-radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
-                            const unsigned *sizes)
-{
-   switch (stage) {
-   case MESA_SHADER_TESS_CTRL:
-      return chip_class >= GFX7 ? 128 : 64;
-   case MESA_SHADER_GEOMETRY:
-      return chip_class >= GFX9 ? 128 : 64;
-   case MESA_SHADER_COMPUTE:
-      break;
-   default:
-      return 0;
-   }
-
-   unsigned max_workgroup_size = sizes[0] * sizes[1] * sizes[2];
-   return max_workgroup_size;
-}
-
 unsigned
 radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
                    gl_shader_stage stage)
@@ -1784,8 +1753,7 @@ radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *varia
          conf->lds_size * info->lds_encode_granularity + variant->info.ps.num_interp * 48;
       lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
    } else if (stage == MESA_SHADER_COMPUTE) {
-      unsigned max_workgroup_size =
-         radv_get_max_workgroup_size(chip_class, stage, variant->info.cs.block_size);
+      unsigned max_workgroup_size = variant->info.workgroup_size;
       lds_per_wave =
          align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
       lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
@@ -1848,7 +1816,7 @@ radv_GetShaderInfoAMD(VkDevice _device, VkPipeline _pipeline, VkShaderStageFlagB
 
          if (stage == MESA_SHADER_COMPUTE) {
             unsigned *local_size = variant->info.cs.block_size;
-            unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
+            unsigned workgroup_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.workgroup_size;
 
             statistics.numAvailableVgprs =
                statistics.numPhysicalVgprs /
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 10747f57fa5..3a022d8140e 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -473,9 +473,6 @@ void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_
 unsigned radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
                             gl_shader_stage stage);
 
-unsigned radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
-                                     const unsigned *sizes);
-
 const char *radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage);
 
 bool radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,