Mesa (main): nouveau/nvc0: disable GLSL IR loop unrolling

Sat Jun 4 16:47:33 UTC 2022

Module: Mesa
Branch: main
Commit: bc0f8455e5dd969bdcc01bb03060c3259330e100
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=bc0f8455e5dd969bdcc01bb03060c3259330e100

Author: Timothy Arceri <tarceri at itsqueeze.com>
Date:   Fri May  6 11:52:31 2022 +1000

nouveau/nvc0: disable GLSL IR loop unrolling

NIR loop unrolling is already enabled so just let it do its job.

Shader-db results (nv120):

total gpr in shared programs: 893490 -> 893898 (0.05%)
gpr in affected programs: 15338 -> 15746 (2.66%)
total instructions in shared programs: 6243205 -> 6237068 (-0.10%)
instructions in affected programs: 71160 -> 65023 (-8.62%)
total bytes in shared programs: 66729616 -> 66664760 (-0.10%)
bytes in affected programs: 759328 -> 694472 (-8.54%)

Reviewed-by: Emma Anholt <emma at anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16366>

---

 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  3 +-
 src/nouveau/codegen/nv50_ir_from_nir.cpp       | 43 +++++++++++++++++++++-----
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 67036444b68..2a1dbf0a70a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -552,6 +552,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
    case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:
    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:
    case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:
+   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
       return 0;
    case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:
       return NVC0_MAX_BUFFERS;
@@ -559,8 +560,6 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
       return (class_3d >= NVE4_3D_CLASS) ? 32 : 16;
    case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
       return (class_3d >= NVE4_3D_CLASS) ? 32 : 16;
-   case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
-      return 32;
    case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:
       if (class_3d >= NVE4_3D_CLASS)
          return NVC0_MAX_IMAGES;
diff --git a/src/nouveau/codegen/nv50_ir_from_nir.cpp b/src/nouveau/codegen/nv50_ir_from_nir.cpp
index 56d6a288176..d8ccdecf9c8 100644
--- a/src/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3379,8 +3379,15 @@ nvir_nir_shader_compiler_options(int chipset, uint8_t shader_type)
    op.lower_rotate = (chipset < NVISA_GV100_CHIPSET);
    op.has_imul24 = false;
    op.intel_vec4 = false;
-   op.force_indirect_unrolling = (nir_variable_mode)
-      ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0);
+   op.force_indirect_unrolling = (nir_variable_mode) (
+      ((shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_out : 0) |
+      /* HW doesn't support indirect addressing of fragment program inputs
+       * on Volta.  The binary driver generates a function to handle every
+       * possible indirection, and indirectly calls the function to handle
+       * this instead.
+       */
+      ((chipset >= NVISA_GV100_CHIPSET && shader_type == PIPE_SHADER_FRAGMENT) ? nir_var_shader_in : 0)
+   );
    op.force_indirect_unrolling_sampler = (chipset < NVISA_GF100_CHIPSET),
    op.max_unroll_iterations = 32;
    op.lower_int64_options = (nir_lower_int64_options) (
@@ -3417,20 +3424,40 @@ static const nir_shader_compiler_options g80_fs_nir_shader_compiler_options =
 nvir_nir_shader_compiler_options(NVISA_G80_CHIPSET, PIPE_SHADER_FRAGMENT);
 static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
 nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_TYPES);
+static const nir_shader_compiler_options gf100_fs_nir_shader_compiler_options =
+nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET, PIPE_SHADER_FRAGMENT);
 static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
 nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_TYPES);
+static const nir_shader_compiler_options gm107_fs_nir_shader_compiler_options =
+nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET, PIPE_SHADER_FRAGMENT);
 static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
 nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_TYPES);
+static const nir_shader_compiler_options gv100_fs_nir_shader_compiler_options =
+nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET, PIPE_SHADER_FRAGMENT);
 
 const nir_shader_compiler_options *
 nv50_ir_nir_shader_compiler_options(int chipset,  uint8_t shader_type)
 {
-   if (chipset >= NVISA_GV100_CHIPSET)
-      return &gv100_nir_shader_compiler_options;
-   if (chipset >= NVISA_GM107_CHIPSET)
-      return &gm107_nir_shader_compiler_options;
-   if (chipset >= NVISA_GF100_CHIPSET)
-      return &gf100_nir_shader_compiler_options;
+   if (chipset >= NVISA_GV100_CHIPSET) {
+      if (shader_type == PIPE_SHADER_FRAGMENT)
+         return &gv100_fs_nir_shader_compiler_options;
+      else
+         return &gv100_nir_shader_compiler_options;
+   }
+
+   if (chipset >= NVISA_GM107_CHIPSET) {
+      if (shader_type == PIPE_SHADER_FRAGMENT)
+         return &gm107_fs_nir_shader_compiler_options;
+      else
+         return &gm107_nir_shader_compiler_options;
+   }
+
+   if (chipset >= NVISA_GF100_CHIPSET) {
+      if (shader_type == PIPE_SHADER_FRAGMENT)
+         return &gf100_fs_nir_shader_compiler_options;
+      else
+         return &gf100_nir_shader_compiler_options;
+   }
 
    if (shader_type == PIPE_SHADER_FRAGMENT)
       return &g80_fs_nir_shader_compiler_options;