Mesa (main): nir: add indirect loop unrolling to compiler options

Tue Aug 3 11:25:30 UTC 2021

Module: Mesa
Branch: main
Commit: a9ed4538abf1247b82adce5cfff9f2bde0b76a05
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a9ed4538abf1247b82adce5cfff9f2bde0b76a05

Author: Timothy Arceri <tarceri at itsqueeze.com>
Date:   Thu Jul 29 19:34:26 2021 +1000

nir: add indirect loop unrolling to compiler options

This is where it should be rather than having to pass it into the
optimisation pass every time.

It also allows us to call the loop analysis pass without having to
duplicate these options which we will do later in this series.

Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12064>

---

 src/amd/vulkan/radv_shader.c                       |  2 +-
 src/asahi/compiler/agx_compile.c                   |  5 +-
 src/asahi/compiler/agx_compile.h                   |  2 +
 src/broadcom/compiler/nir_to_vir.c                 |  4 +-
 src/broadcom/vulkan/v3dv_pipeline.c                |  1 +
 src/compiler/nir/nir.h                             |  8 +++-
 src/compiler/nir/nir_opt_loop_unroll.c             |  3 +-
 src/freedreno/ir3/ir3_nir.c                        |  3 +-
 src/gallium/auxiliary/nir/nir_to_tgsi.c            |  9 ++--
 src/gallium/auxiliary/nir/tgsi_to_nir.c            |  2 +-
 src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c |  2 +-
 src/gallium/drivers/etnaviv/etnaviv_screen.c       |  1 +
 src/gallium/drivers/freedreno/a2xx/ir2_nir.c       |  3 +-
 src/gallium/drivers/lima/lima_program.c            | 12 ++---
 src/gallium/drivers/radeonsi/si_shader_nir.c       |  2 +-
 src/gallium/drivers/vc4/vc4_program.c              |  6 +--
 src/intel/compiler/brw_compiler.c                  |  6 ++-
 src/intel/compiler/brw_nir.c                       | 55 +---------------------
 src/intel/compiler/brw_shader.h                    | 44 +++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_nir.cpp          |  2 +-
 src/panfrost/bifrost/bifrost_compile.c             |  5 +-
 src/panfrost/bifrost/bifrost_compile.h             |  1 +
 src/panfrost/midgard/midgard_compile.c             |  5 +-
 src/panfrost/midgard/midgard_compile.h             |  1 +
 24 files changed, 90 insertions(+), 94 deletions(-)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c9f402870d8..82dade3dee4 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -181,7 +181,7 @@ radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
       NIR_PASS(progress, shader, nir_opt_shrink_vectors,
                !device->instance->disable_shrink_image_store);
       if (shader->options->max_unroll_iterations) {
-         NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+         NIR_PASS(progress, shader, nir_opt_loop_unroll);
       }
    } while (progress && !optimize_conservatively);
 
diff --git a/src/asahi/compiler/agx_compile.c b/src/asahi/compiler/agx_compile.c
index e0458ba4c1c..1066ed4e490 100644
--- a/src/asahi/compiler/agx_compile.c
+++ b/src/asahi/compiler/agx_compile.c
@@ -1235,10 +1235,7 @@ agx_optimize_nir(nir_shader *nir)
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-      NIR_PASS(progress, nir, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, nir, nir_opt_loop_unroll);
    } while (progress);
 
    NIR_PASS_V(nir, nir_opt_algebraic_late);
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 6e8b14ad795..b874ee1b6a1 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -213,6 +213,8 @@ static const nir_shader_compiler_options agx_nir_options = {
    .lower_doubles_options = nir_lower_dmod,
    .lower_int64_options = ~(nir_lower_iadd64 | nir_lower_imul_2x32_64),
 
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
+
    .has_fsub = true,
    .has_isub = true,
    .has_cs_global_id = true,
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 190d7c3a2e9..b0cc4977c0b 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -1877,9 +1877,7 @@ v3d_optimize_nir(struct v3d_compile *c, struct nir_shader *s)
                 if (c && !c->disable_loop_unrolling &&
                     s->options->max_unroll_iterations > 0) {
                        bool local_progress = false;
-                       NIR_PASS(local_progress, s, nir_opt_loop_unroll,
-                                nir_var_shader_in |
-                                nir_var_function_temp);
+                       NIR_PASS(local_progress, s, nir_opt_loop_unroll);
                        c->unrolled_any_loops |= local_progress;
                        progress |= local_progress;
                 }
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index 4447e4d0d84..4e692bbf389 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -248,6 +248,7 @@ const nir_shader_compiler_options v3dv_nir_options = {
                                    * needs to be supported */
    .lower_interpolate_at = true,
    .max_unroll_iterations = 16,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_function_temp),
    .divergence_analysis_options =
       nir_divergence_multiple_workgroup_per_compute_subgroup
 };
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 63bdc6fcf38..76ca1f94a9d 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3690,6 +3690,12 @@ typedef struct nir_shader_compiler_options {
     * vectorized IO can pack more varyings when linking. */
    bool linker_ignore_precision;
 
+   /**
+    * Specifies which type of indirectly accessed variables should force
+    * loop unrolling.
+    */
+   nir_variable_mode force_indirect_unrolling;
+
    nir_lower_int64_options lower_int64_options;
    nir_lower_doubles_options lower_doubles_options;
    nir_divergence_options divergence_analysis_options;
@@ -5436,7 +5442,7 @@ bool nir_opt_large_constants(nir_shader *shader,
                              glsl_type_size_align_func size_align,
                              unsigned threshold);
 
-bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask);
+bool nir_opt_loop_unroll(nir_shader *shader);
 
 typedef enum {
     nir_move_const_undef = (1 << 0),
diff --git a/src/compiler/nir/nir_opt_loop_unroll.c b/src/compiler/nir/nir_opt_loop_unroll.c
index c4c76742d79..3dec5e15e56 100644
--- a/src/compiler/nir/nir_opt_loop_unroll.c
+++ b/src/compiler/nir/nir_opt_loop_unroll.c
@@ -1058,10 +1058,11 @@ nir_opt_loop_unroll_impl(nir_function_impl *impl,
  * should force loop unrolling.
  */
 bool
-nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask)
+nir_opt_loop_unroll(nir_shader *shader)
 {
    bool progress = false;
 
+   nir_variable_mode indirect_mask = shader->options->force_indirect_unrolling;
    nir_foreach_function(function, shader) {
       if (function->impl) {
          progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2b651676a6b..c0c9a7b90b8 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -132,6 +132,7 @@ static const nir_shader_compiler_options options_a6xx = {
    .has_fsub = true,
    .has_isub = true,
    .max_unroll_iterations = 32,
+   .force_indirect_unrolling = nir_var_all,
    .lower_wpos_pntc = true,
    .lower_cs_local_index_from_id = true,
 
@@ -269,7 +270,7 @@ ir3_optimize_loop(struct ir3_compiler *compiler, nir_shader *s)
          OPT(s, nir_opt_dce);
       }
       progress |= OPT(s, nir_opt_if, false);
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
       progress |= OPT(s, nir_opt_remove_phis);
       progress |= OPT(s, nir_opt_undef);
    } while (progress);
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index c8b0b9ca982..135793dbde7 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2458,7 +2458,6 @@ static void
 ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
 {
    bool progress;
-   nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
    unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
    unsigned control_flow_depth =
       screen->get_shader_param(screen, pipe_stage,
@@ -2492,7 +2491,7 @@ ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
       NIR_PASS(progress, s, nir_opt_trivial_continues);
       NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL);
       NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
 
    } while (progress);
 }
@@ -2802,6 +2801,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
       !screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage),
                                 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
 
+   nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
+
    if (!options->lower_extract_byte ||
        !options->lower_extract_word ||
        !options->lower_insert_byte ||
@@ -2812,7 +2813,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
        !options->lower_rotate ||
        !options->lower_uniforms_to_ubo ||
        !options->lower_vector_cmp ||
-       options->lower_fsqrt != lower_fsqrt) {
+       options->lower_fsqrt != lower_fsqrt ||
+       options->force_indirect_unrolling != no_indirects_mask) {
       nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options);
       *new_options = *s->options;
 
@@ -2827,6 +2829,7 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
       new_options->lower_uniforms_to_ubo = true,
       new_options->lower_vector_cmp = true;
       new_options->lower_fsqrt = lower_fsqrt;
+      new_options->force_indirect_unrolling = no_indirects_mask;
 
       s->options = new_options;
    }
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index cf9a49bf329..29f110e9f5e 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -2444,7 +2444,7 @@ ttn_optimize_nir(nir_shader *nir)
       NIR_PASS(progress, nir, nir_opt_conditional_discard);
 
       if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
       }
 
    } while (progress);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
index 6d28fee391e..b1ca2e0ddb4 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_compiler_nir.c
@@ -164,7 +164,7 @@ etna_optimize_loop(nir_shader *s)
          OPT(s, nir_copy_prop);
          OPT(s, nir_opt_dce);
       }
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
       progress |= OPT(s, nir_opt_if, false);
       progress |= OPT(s, nir_opt_remove_phis);
       progress |= OPT(s, nir_opt_undef);
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 82a0e152bfc..45fede5ca00 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -1076,6 +1076,7 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
       .lower_fsqrt = !screen->specs.has_sin_cos_sqrt,
       .lower_sincos = !screen->specs.has_sin_cos_sqrt,
       .lower_uniforms_to_ubo = screen->specs.halti >= 2,
+      .force_indirect_unrolling = nir_var_all,
    };
 
    /* apply debug options that disable individual features */
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 46c7ad7ff0f..74331275300 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -49,6 +49,7 @@ static const nir_shader_compiler_options options = {
    .has_isub = true,
    .lower_insert_byte = true,
    .lower_insert_word = true,
+   .force_indirect_unrolling = nir_var_all,
 };
 
 const nir_shader_compiler_options *
@@ -92,7 +93,7 @@ ir2_optimize_loop(nir_shader *s)
          OPT(s, nir_copy_prop);
          OPT(s, nir_opt_dce);
       }
-      progress |= OPT(s, nir_opt_loop_unroll, nir_var_all);
+      progress |= OPT(s, nir_opt_loop_unroll);
       progress |= OPT(s, nir_opt_if, false);
       progress |= OPT(s, nir_opt_remove_phis);
       progress |= OPT(s, nir_opt_undef);
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 562586b851e..e43fa9e629e 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -61,6 +61,7 @@ static const nir_shader_compiler_options vs_nir_options = {
    .lower_fceil = true,
    .lower_insert_byte = true,
    .lower_insert_word = true,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 static const nir_shader_compiler_options fs_nir_options = {
@@ -80,6 +81,7 @@ static const nir_shader_compiler_options fs_nir_options = {
    .lower_insert_word = true,
    .lower_bitops = true,
    .lower_vector_cmp = true,
+   .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 const void *
@@ -131,10 +133,7 @@ lima_program_optimize_vs_nir(struct nir_shader *s)
       NIR_PASS(progress, s, lima_nir_lower_ftrunc);
       NIR_PASS(progress, s, nir_opt_constant_folding);
       NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
    } while (progress);
 
    NIR_PASS_V(s, nir_lower_int_to_float);
@@ -236,10 +235,7 @@ lima_program_optimize_fs_nir(struct nir_shader *s,
       NIR_PASS(progress, s, nir_opt_algebraic);
       NIR_PASS(progress, s, nir_opt_constant_folding);
       NIR_PASS(progress, s, nir_opt_undef);
-      NIR_PASS(progress, s, nir_opt_loop_unroll,
-               nir_var_shader_in |
-               nir_var_shader_out |
-               nir_var_function_temp);
+      NIR_PASS(progress, s, nir_opt_loop_unroll);
       NIR_PASS(progress, s, lima_nir_split_load_input);
    } while (progress);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c
index 5573cc7065c..044c1ffeb99 100644
--- a/src/gallium/drivers/radeonsi/si_shader_nir.c
+++ b/src/gallium/drivers/radeonsi/si_shader_nir.c
@@ -596,7 +596,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_opt_conditional_discard);
       if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, 0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
       }
 
       if (nir->info.stage == MESA_SHADER_FRAGMENT)
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 0c4ff754d4d..782127f5c33 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1548,10 +1548,7 @@ vc4_optimize_nir(struct nir_shader *s)
                 }
 
                 NIR_PASS(progress, s, nir_opt_undef);
-                NIR_PASS(progress, s, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, s, nir_opt_loop_unroll);
         } while (progress);
 }
 
@@ -2195,6 +2192,7 @@ static const nir_shader_compiler_options nir_options = {
         .has_fsub = true,
         .has_isub = true,
         .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 const void *
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index c1c4f869f9c..4336ff73a08 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -68,7 +68,8 @@
    .lower_usub_sat64 = true,                                                  \
    .lower_hadd64 = true,                                                      \
    .lower_bfe_with_two_constants = true,                                      \
-   .max_unroll_iterations = 32
+   .max_unroll_iterations = 32,                                               \
+   .force_indirect_unrolling = nir_var_function_temp
 
 static const struct nir_shader_compiler_options scalar_nir_options = {
    COMMON_OPTIONS,
@@ -196,6 +197,9 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
 
       nir_options->unify_interfaces = i < MESA_SHADER_FRAGMENT;
 
+      nir_options->force_indirect_unrolling |=
+         brw_nir_no_indirect_mask(compiler, i);
+
       compiler->glsl_compiler_options[i].NirOptions = nir_options;
 
       compiler->glsl_compiler_options[i].ClampBlockIndicesToArrayBounds = true;
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index dc09dcbd24c..a50a76e73aa 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -520,63 +520,10 @@ brw_nir_lower_fs_outputs(nir_shader *nir)
    this_progress;                                          \
 })
 
-static nir_variable_mode
-brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
-                         gl_shader_stage stage)
-{
-   const struct intel_device_info *devinfo = compiler->devinfo;
-   const bool is_scalar = compiler->scalar_stage[stage];
-   nir_variable_mode indirect_mask = 0;
-
-   switch (stage) {
-   case MESA_SHADER_VERTEX:
-   case MESA_SHADER_FRAGMENT:
-      indirect_mask |= nir_var_shader_in;
-      break;
-
-   case MESA_SHADER_GEOMETRY:
-      if (!is_scalar)
-         indirect_mask |= nir_var_shader_in;
-      break;
-
-   default:
-      /* Everything else can handle indirect inputs */
-      break;
-   }
-
-   if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
-      indirect_mask |= nir_var_shader_out;
-
-   /* On HSW+, we allow indirects in scalar shaders.  They get implemented
-    * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
-    * brw_postprocess_nir.
-    *
-    * We haven't plumbed through the indirect scratch messages on gfx6 or
-    * earlier so doing indirects via scratch doesn't work there. On gfx7 and
-    * earlier the scratch space size is limited to 12kB.  If we allowed
-    * indirects as scratch all the time, we may easily exceed this limit
-    * without having any fallback.
-    */
-   if (is_scalar && devinfo->verx10 <= 70)
-      indirect_mask |= nir_var_function_temp;
-
-   return indirect_mask;
-}
-
 void
 brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
                  bool is_scalar, bool allow_copies)
 {
-   nir_variable_mode loop_indirect_mask =
-      brw_nir_no_indirect_mask(compiler, nir->info.stage);
-
-   /* We can handle indirects via scratch messages.  However, they are
-    * expensive so we'd rather not if we can avoid it.  Have loop unrolling
-    * try to get rid of them.
-    */
-   if (is_scalar)
-      loop_indirect_mask |= nir_var_function_temp;
-
    bool progress;
    unsigned lower_flrp =
       (nir->options->lower_flrp16 ? 16 : 0) |
@@ -671,7 +618,7 @@ brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
       OPT(nir_opt_if, false);
       OPT(nir_opt_conditional_discard);
       if (nir->options->max_unroll_iterations != 0) {
-         OPT(nir_opt_loop_unroll, loop_indirect_mask);
+         OPT(nir_opt_loop_unroll);
       }
       OPT(nir_opt_remove_phis);
       OPT(nir_opt_gcm, false);
diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h
index 20e9281b7d0..8d0c9c6b164 100644
--- a/src/intel/compiler/brw_shader.h
+++ b/src/intel/compiler/brw_shader.h
@@ -127,6 +127,50 @@ brw_get_scratch_size(int size)
    return MAX2(1024, util_next_power_of_two(size));
 }
 
+
+static inline nir_variable_mode
+brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
+                         gl_shader_stage stage)
+{
+   const struct intel_device_info *devinfo = compiler->devinfo;
+   const bool is_scalar = compiler->scalar_stage[stage];
+   nir_variable_mode indirect_mask = (nir_variable_mode) 0;
+
+   switch (stage) {
+   case MESA_SHADER_VERTEX:
+   case MESA_SHADER_FRAGMENT:
+      indirect_mask |= nir_var_shader_in;
+      break;
+
+   case MESA_SHADER_GEOMETRY:
+      if (!is_scalar)
+         indirect_mask |= nir_var_shader_in;
+      break;
+
+   default:
+      /* Everything else can handle indirect inputs */
+      break;
+   }
+
+   if (is_scalar && stage != MESA_SHADER_TESS_CTRL)
+      indirect_mask |= nir_var_shader_out;
+
+   /* On HSW+, we allow indirects in scalar shaders.  They get implemented
+    * using nir_lower_vars_to_explicit_types and nir_lower_explicit_io in
+    * brw_postprocess_nir.
+    *
+    * We haven't plumbed through the indirect scratch messages on gfx6 or
+    * earlier so doing indirects via scratch doesn't work there. On gfx7 and
+    * earlier the scratch space size is limited to 12kB.  If we allowed
+    * indirects as scratch all the time, we may easily exceed this limit
+    * without having any fallback.
+    */
+   if (is_scalar && devinfo->verx10 <= 70)
+      indirect_mask |= nir_var_function_temp;
+
+   return indirect_mask;
+}
+
 bool brw_texture_offset(const nir_tex_instr *tex, unsigned src,
                         uint32_t *offset_bits);
 
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
index 182644bda31..caeb195518a 100644
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -324,7 +324,7 @@ st_nir_opts(nir_shader *nir)
       NIR_PASS(progress, nir, nir_opt_undef);
       NIR_PASS(progress, nir, nir_opt_conditional_discard);
       if (nir->options->max_unroll_iterations) {
-         NIR_PASS(progress, nir, nir_opt_loop_unroll, (nir_variable_mode)0);
+         NIR_PASS(progress, nir, nir_opt_loop_unroll);
       }
    } while (progress);
 }
diff --git a/src/panfrost/bifrost/bifrost_compile.c b/src/panfrost/bifrost/bifrost_compile.c
index afacef28831..a8df91075dd 100644
--- a/src/panfrost/bifrost/bifrost_compile.c
+++ b/src/panfrost/bifrost/bifrost_compile.c
@@ -3306,10 +3306,7 @@ bi_optimize_nir(nir_shader *nir, unsigned gpu_id, bool is_blend)
                 NIR_PASS(progress, nir, nir_opt_undef);
                 NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-                NIR_PASS(progress, nir, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll);
         } while (progress);
 
         /* TODO: Why is 64-bit getting rematerialized?
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index 45bfd522131..9b320d74e26 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -93,6 +93,7 @@ static const nir_shader_compiler_options bifrost_nir_options = {
         .vertex_id_zero_based = true,
         .lower_cs_local_index_from_id = true,
         .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 #endif
diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c
index 183e2991497..609db5086c3 100644
--- a/src/panfrost/midgard/midgard_compile.c
+++ b/src/panfrost/midgard/midgard_compile.c
@@ -369,10 +369,7 @@ optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
                 NIR_PASS(progress, nir, nir_opt_undef);
                 NIR_PASS(progress, nir, nir_lower_undef_to_zero);
 
-                NIR_PASS(progress, nir, nir_opt_loop_unroll,
-                         nir_var_shader_in |
-                         nir_var_shader_out |
-                         nir_var_function_temp);
+                NIR_PASS(progress, nir, nir_opt_loop_unroll);
 
                 NIR_PASS(progress, nir, nir_opt_vectorize,
                          midgard_vectorize_filter, NULL);
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index d3bce99c8ce..65c9becd28e 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -97,6 +97,7 @@ static const nir_shader_compiler_options midgard_nir_options = {
         .has_cs_global_id = true,
         .lower_cs_local_index_from_id = true,
         .max_unroll_iterations = 32,
+        .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp),
 };
 
 #endif