[Mesa-dev] [PATCH 11/12] i965: Create NIR during LinkShader() and ProgramStringNotify().

Kenneth Graunke kenneth at whitecape.org
Wed Apr 8 00:06:34 PDT 2015


Previously, we translated into NIR and did all the optimizations and
lowering as part of running fs_visitor.  This meant that we did all of
that work twice for fragment shaders - once for SIMD8, and again for
SIMD16.  We also had to redo it every time we hit a state based
recompile.

We now generate NIR once at link time.  ARB programs don't have linking,
so we instead generate it at ProgramStringNotify time.

Mesa's fixed function vertex program handling doesn't bother to inform
the driver about new programs at all (which is rather mean), so we
generate NIR at the last minute, if it hasn't happened already.

shader-db runs ~9.4% faster on my i7-5600U, with a release build.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   | 174 +----------------------
 src/mesa/drivers/dri/i965/brw_nir.c        | 213 +++++++++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_nir.h        |   6 +
 src/mesa/drivers/dri/i965/brw_program.c    |   7 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   6 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp     |  17 ++-
 src/mesa/main/mtypes.h                     |   2 +
 src/mesa/program/program.c                 |   5 +
 9 files changed, 255 insertions(+), 176 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_nir.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 498d5a7..6d4659f 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -77,6 +77,7 @@ i965_FILES = \
 	brw_misc_state.c \
 	brw_multisample_state.h \
 	brw_nir.h \
+	brw_nir.c \
 	brw_nir_analyze_boolean_resolves.c \
 	brw_object_purgeable.c \
 	brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 034b79a..ccffd5d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -28,175 +28,10 @@
 #include "brw_fs.h"
 #include "brw_nir.h"
 
-static void
-nir_optimize(nir_shader *nir)
-{
-   bool progress;
-   do {
-      progress = false;
-      nir_lower_vars_to_ssa(nir);
-      nir_validate_shader(nir);
-      nir_lower_alu_to_scalar(nir);
-      nir_validate_shader(nir);
-      progress |= nir_copy_prop(nir);
-      nir_validate_shader(nir);
-      nir_lower_phis_to_scalar(nir);
-      nir_validate_shader(nir);
-      progress |= nir_copy_prop(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_dce(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_cse(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_peephole_select(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_algebraic(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_constant_folding(nir);
-      nir_validate_shader(nir);
-      progress |= nir_opt_remove_phis(nir);
-      nir_validate_shader(nir);
-   } while (progress);
-}
-
-static bool
-count_nir_instrs_in_block(nir_block *block, void *state)
-{
-   int *count = (int *) state;
-   nir_foreach_instr(block, instr) {
-      *count = *count + 1;
-   }
-   return true;
-}
-
-static int
-count_nir_instrs(nir_shader *nir)
-{
-   int count = 0;
-   nir_foreach_overload(nir, overload) {
-      if (!overload->impl)
-         continue;
-      nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
-   }
-   return count;
-}
-
 void
 fs_visitor::emit_nir_code()
 {
-   const nir_shader_compiler_options *options =
-      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-
-   nir_shader *nir;
-   /* First, lower the GLSL IR or Mesa IR to NIR */
-   if (shader_prog) {
-      nir = glsl_to_nir(&shader->base, options);
-   } else {
-      nir = prog_to_nir(prog, options);
-      nir_convert_to_ssa(nir); /* turn registers into SSA */
-   }
-   nir_validate_shader(nir);
-
-   nir_lower_global_vars_to_local(nir);
-   nir_validate_shader(nir);
-
-   nir_lower_tex_projector(nir);
-   nir_validate_shader(nir);
-
-   nir_normalize_cubemap_coords(nir);
-   nir_validate_shader(nir);
-
-   nir_split_var_copies(nir);
-   nir_validate_shader(nir);
-
-   nir_optimize(nir);
-
-   /* Lower a bunch of stuff */
-   nir_lower_var_copies(nir);
-   nir_validate_shader(nir);
-
-   /* Get rid of split copies */
-   nir_optimize(nir);
-
-   if (shader_prog) {
-      nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
-                                                   &nir->num_direct_uniforms,
-                                                   &nir->num_uniforms);
-   } else {
-      /* ARB programs generally create a giant array of "uniform" data, and allow
-       * indirect addressing without any boundaries.  In the absence of bounds
-       * analysis, it's all or nothing.  num_direct_uniforms is only useful when
-       * we have some direct and some indirect access; it doesn't matter here.
-       */
-      nir->num_direct_uniforms = 0;
-   }
-   nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
-   nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
-
-   nir_lower_io(nir);
-   nir_validate_shader(nir);
-
-   nir_remove_dead_variables(nir);
-   nir_validate_shader(nir);
-
-   if (shader_prog) {
-      nir_lower_samplers(nir, shader_prog, stage);
-      nir_validate_shader(nir);
-   }
-
-   nir_lower_system_values(nir);
-   nir_validate_shader(nir);
-
-   nir_lower_atomics(nir);
-   nir_validate_shader(nir);
-
-   nir_optimize(nir);
-
-   if (brw->gen >= 6) {
-      /* Try and fuse multiply-adds */
-      nir_opt_peephole_ffma(nir);
-      nir_validate_shader(nir);
-   }
-
-   nir_opt_algebraic_late(nir);
-   nir_validate_shader(nir);
-
-   nir_lower_locals_to_regs(nir);
-   nir_validate_shader(nir);
-
-   nir_lower_to_source_mods(nir);
-   nir_validate_shader(nir);
-   nir_copy_prop(nir);
-   nir_validate_shader(nir);
-   nir_opt_dce(nir);
-   nir_validate_shader(nir);
-
-   if (unlikely(debug_enabled)) {
-      fprintf(stderr, "NIR (SSA form) for %s shader:\n", stage_name);
-      nir_print_shader(nir, stderr);
-   }
-
-   if (dispatch_width == 8) {
-      static GLuint msg_id = 0;
-      _mesa_gl_debug(&brw->ctx, &msg_id,
-                     MESA_DEBUG_SOURCE_SHADER_COMPILER,
-                     MESA_DEBUG_TYPE_OTHER,
-                     MESA_DEBUG_SEVERITY_NOTIFICATION,
-                     "%s NIR shader: %d inst\n",
-                     stage_abbrev,
-                     count_nir_instrs(nir));
-   }
-
-   nir_convert_from_ssa(nir);
-   nir_validate_shader(nir);
-
-   /* This is the last pass we run before we start emitting stuff.  It
-    * determines when we need to insert boolean resolves on Gen <= 5.  We
-    * run it last because it stashes data in instr->pass_flags and we don't
-    * want that to be squashed by other NIR passes.
-    */
-   if (brw->gen <= 5)
-      brw_nir_analyze_boolean_resolves(nir);
+   nir_shader *nir = prog->nir;
 
    /* emit the arrays used for inputs and outputs - load/store intrinsics will
     * be converted to reads/writes of these arrays
@@ -232,13 +67,6 @@ fs_visitor::emit_nir_code()
       assert(overload->impl);
       nir_emit_impl(overload->impl);
    }
-
-   if (unlikely(debug_enabled)) {
-      fprintf(stderr, "NIR (final form) for %s shader:\n", stage_name);
-      nir_print_shader(nir, stderr);
-   }
-
-   ralloc_free(nir);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
new file mode 100644
index 0000000..de4d7aa
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_nir.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/nir/glsl_to_nir.h"
+#include "program/prog_to_nir.h"
+
+static void
+nir_optimize(nir_shader *nir)
+{
+   bool progress;
+   do {
+      progress = false;
+      nir_lower_vars_to_ssa(nir);
+      nir_validate_shader(nir);
+      nir_lower_alu_to_scalar(nir);
+      nir_validate_shader(nir);
+      progress |= nir_copy_prop(nir);
+      nir_validate_shader(nir);
+      nir_lower_phis_to_scalar(nir);
+      nir_validate_shader(nir);
+      progress |= nir_copy_prop(nir);
+      nir_validate_shader(nir);
+      progress |= nir_opt_dce(nir);
+      nir_validate_shader(nir);
+      progress |= nir_opt_cse(nir);
+      nir_validate_shader(nir);
+      progress |= nir_opt_peephole_select(nir);
+      nir_validate_shader(nir);
+      progress |= nir_opt_algebraic(nir);
+      nir_validate_shader(nir);
+      progress |= nir_opt_constant_folding(nir);
+      nir_validate_shader(nir);
+      progress |= nir_opt_remove_phis(nir);
+      nir_validate_shader(nir);
+   } while (progress);
+}
+
+static bool
+count_nir_instrs_in_block(nir_block *block, void *state)
+{
+   int *count = (int *) state;
+   nir_foreach_instr(block, instr) {
+      *count = *count + 1;
+   }
+   return true;
+}
+
+static int
+count_nir_instrs(nir_shader *nir)
+{
+   int count = 0;
+   nir_foreach_overload(nir, overload) {
+      if (!overload->impl)
+         continue;
+      nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count);
+   }
+   return count;
+}
+
+nir_shader *
+brw_create_nir(struct brw_context *brw,
+               const struct gl_shader_program *shader_prog,
+               const struct gl_program *prog,
+               gl_shader_stage stage)
+{
+   struct gl_context *ctx = &brw->ctx;
+   const nir_shader_compiler_options *options =
+      ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+   struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
+   bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
+   nir_shader *nir;
+
+   /* First, lower the GLSL IR or Mesa IR to NIR */
+   if (shader_prog) {
+      nir = glsl_to_nir(shader, options);
+   } else {
+      nir = prog_to_nir(prog, options);
+      nir_convert_to_ssa(nir); /* turn registers into SSA */
+   }
+   nir_validate_shader(nir);
+
+   nir_lower_global_vars_to_local(nir);
+   nir_validate_shader(nir);
+
+   nir_lower_tex_projector(nir);
+   nir_validate_shader(nir);
+
+   nir_normalize_cubemap_coords(nir);
+   nir_validate_shader(nir);
+
+   nir_split_var_copies(nir);
+   nir_validate_shader(nir);
+
+   nir_optimize(nir);
+
+   /* Lower a bunch of stuff */
+   nir_lower_var_copies(nir);
+   nir_validate_shader(nir);
+
+   /* Get rid of split copies */
+   nir_optimize(nir);
+
+   if (shader_prog) {
+      nir_assign_var_locations_scalar_direct_first(nir, &nir->uniforms,
+                                                   &nir->num_direct_uniforms,
+                                                   &nir->num_uniforms);
+   } else {
+      /* ARB programs generally create a giant array of "uniform" data, and allow
+       * indirect addressing without any boundaries.  In the absence of bounds
+       * analysis, it's all or nothing.  num_direct_uniforms is only useful when
+       * we have some direct and some indirect access; it doesn't matter here.
+       */
+      nir->num_direct_uniforms = 0;
+   }
+   nir_assign_var_locations_scalar(&nir->inputs, &nir->num_inputs);
+   nir_assign_var_locations_scalar(&nir->outputs, &nir->num_outputs);
+
+   nir_lower_io(nir);
+   nir_validate_shader(nir);
+
+   nir_remove_dead_variables(nir);
+   nir_validate_shader(nir);
+
+   if (shader_prog) {
+      nir_lower_samplers(nir, shader_prog, stage);
+      nir_validate_shader(nir);
+   }
+
+   nir_lower_system_values(nir);
+   nir_validate_shader(nir);
+
+   nir_lower_atomics(nir);
+   nir_validate_shader(nir);
+
+   nir_optimize(nir);
+
+   if (brw->gen >= 6) {
+      /* Try and fuse multiply-adds */
+      nir_opt_peephole_ffma(nir);
+      nir_validate_shader(nir);
+   }
+
+   nir_opt_algebraic_late(nir);
+   nir_validate_shader(nir);
+
+   nir_lower_locals_to_regs(nir);
+   nir_validate_shader(nir);
+
+   nir_lower_to_source_mods(nir);
+   nir_validate_shader(nir);
+   nir_copy_prop(nir);
+   nir_validate_shader(nir);
+   nir_opt_dce(nir);
+   nir_validate_shader(nir);
+
+   if (unlikely(debug_enabled)) {
+      fprintf(stderr, "NIR (SSA form) for %s shader:\n",
+              _mesa_shader_stage_to_string(stage));
+      nir_print_shader(nir, stderr);
+   }
+
+   static GLuint msg_id = 0;
+   _mesa_gl_debug(&brw->ctx, &msg_id,
+                  MESA_DEBUG_SOURCE_SHADER_COMPILER,
+                  MESA_DEBUG_TYPE_OTHER,
+                  MESA_DEBUG_SEVERITY_NOTIFICATION,
+                  "%s NIR shader: %d inst\n",
+                  _mesa_shader_stage_to_abbrev(stage),
+                  count_nir_instrs(nir));
+
+   nir_convert_from_ssa(nir);
+   nir_validate_shader(nir);
+
+   /* This is the last pass we run before we start emitting stuff.  It
+    * determines when we need to insert boolean resolves on Gen <= 5.  We
+    * run it last because it stashes data in instr->pass_flags and we don't
+    * want that to be squashed by other NIR passes.
+    */
+   if (brw->gen <= 5)
+      brw_nir_analyze_boolean_resolves(nir);
+
+   nir_sweep(nir);
+
+   if (unlikely(debug_enabled)) {
+      fprintf(stderr, "NIR (final form) for %s shader:\n",
+              _mesa_shader_stage_to_string(stage));
+      nir_print_shader(nir, stderr);
+   }
+
+   return nir;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 27782a3..3131109 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -23,6 +23,7 @@
 
 #pragma once
 
+#include "brw_context.h"
 #include "glsl/nir/nir.h"
 
 #ifdef __cplusplus
@@ -73,6 +74,11 @@ enum {
 
 void brw_nir_analyze_boolean_resolves(nir_shader *nir);
 
+nir_shader *brw_create_nir(struct brw_context *brw,
+                           const struct gl_shader_program *shader_prog,
+                           const struct gl_program *prog,
+                           gl_shader_stage stage);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 8920c34..389491c 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -43,6 +43,7 @@
 
 #include "brw_context.h"
 #include "brw_shader.h"
+#include "brw_nir.h"
 #include "brw_wm.h"
 #include "intel_batchbuffer.h"
 
@@ -141,6 +142,9 @@ brwProgramStringNotify(struct gl_context *ctx,
 
       brw_add_texrect_params(prog);
 
+      prog->nir = brw_create_nir(brw, NULL, prog,
+                                 _mesa_program_enum_to_shader_stage(target));
+
       brw_fs_precompile(ctx, NULL, prog);
       break;
    }
@@ -163,6 +167,9 @@ brwProgramStringNotify(struct gl_context *ctx,
 
       brw_add_texrect_params(prog);
 
+      prog->nir = brw_create_nir(brw, NULL, prog,
+                                 _mesa_program_enum_to_shader_stage(target));
+
       brw_vs_precompile(ctx, NULL, prog);
       break;
    }
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 8700077..335a800 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -27,6 +27,7 @@
 #include "brw_gs.h"
 #include "brw_fs.h"
 #include "brw_cfg.h"
+#include "brw_nir.h"
 #include "glsl/ir_optimization.h"
 #include "glsl/glsl_parser_extras.h"
 #include "main/shaderapi.h"
@@ -229,6 +230,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 
    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
       struct gl_shader *shader = shProg->_LinkedShaders[stage];
+      const struct gl_shader_compiler_options *options =
+         &ctx->Const.ShaderCompilerOptions[stage];
 
       if (!shader)
 	 continue;
@@ -277,6 +280,9 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
 
       brw_add_texrect_params(prog);
 
+      if (options->NirOptions)
+         prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage);
+
       _mesa_reference_program(ctx, &prog, NULL);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ef2fd40..c4c77b2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -25,6 +25,7 @@
 #include "brw_fs.h"
 #include "brw_cfg.h"
 #include "brw_vs.h"
+#include "brw_nir.h"
 #include "brw_vec4_live_variables.h"
 #include "brw_dead_control_flow.h"
 
@@ -1809,6 +1810,8 @@ brw_vs_emit(struct brw_context *brw,
    bool start_busy = false;
    double start_time = 0;
    const unsigned *assembly = NULL;
+   bool use_nir =
+      brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions != NULL;
 
    if (unlikely(brw->perf_debug)) {
       start_busy = (brw->batch.last_bo &&
@@ -1823,9 +1826,17 @@ brw_vs_emit(struct brw_context *brw,
    if (unlikely(INTEL_DEBUG & DEBUG_VS))
       brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
 
-   if (brw->scalar_vs &&
-       (prog ||
-        brw->ctx.Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions)) {
+   if (use_nir && !c->vp->program.Base.nir) {
+      /* Normally we generate NIR in LinkShader() or ProgramStringNotify(), but
+       * Mesa's fixed-function vertex program handling doesn't notify the driver
+       * at all.  Just do it here, at the last minute, even though it's lame.
+       */
+      assert(c->vp->program.Base.Id == 0 && prog == NULL);
+      c->vp->program.Base.nir =
+         brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
+   }
+
+   if (brw->scalar_vs && (prog || use_nir)) {
       fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
       if (!v.run_vs()) {
          if (prog) {
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 6184028..5d726b4 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -2087,6 +2087,8 @@ struct gl_program
 
    struct prog_instruction *Instructions;
 
+   struct nir_shader *nir;
+
    GLbitfield64 InputsRead;     /**< Bitmask of which input regs are read */
    GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */
    GLbitfield SystemValuesRead;   /**< Bitmask of SYSTEM_VALUE_x inputs used */
diff --git a/src/mesa/program/program.c b/src/mesa/program/program.c
index 3c214d5..4f28e2a 100644
--- a/src/mesa/program/program.c
+++ b/src/mesa/program/program.c
@@ -37,6 +37,7 @@
 #include "prog_cache.h"
 #include "prog_parameter.h"
 #include "prog_instruction.h"
+#include "util/ralloc.h"
 
 
 /**
@@ -380,6 +381,10 @@ _mesa_delete_program(struct gl_context *ctx, struct gl_program *prog)
       _mesa_free_parameter_list(prog->Parameters);
    }
 
+   if (prog->nir) {
+      ralloc_free(prog->nir);
+   }
+
    free(prog);
 }
 
-- 
2.3.5



More information about the mesa-dev mailing list