[Mesa-dev] [RFCv3 11/11] mesa/st: add support for NIR as possible driver IR

Sun Jan 31 12:16:19 PST 2016

From: Rob Clark <robclark at freedesktop.org>

Signed-off-by: Rob Clark <robclark at freedesktop.org>
---
 src/compiler/nir/nir.h                     |   2 +
 src/mesa/Makefile.sources                  |   1 +
 src/mesa/state_tracker/st_glsl_to_nir.cpp  | 407 +++++++++++++++++++++++++++++
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  40 ++-
 src/mesa/state_tracker/st_glsl_to_tgsi.h   |   5 +
 src/mesa/state_tracker/st_nir.h            |  23 ++
 src/mesa/state_tracker/st_program.c        | 171 ++++++++++--
 src/mesa/state_tracker/st_program.h        |   6 +
 8 files changed, 623 insertions(+), 32 deletions(-)
 create mode 100644 src/mesa/state_tracker/st_glsl_to_nir.cpp

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 29c8631..148fe68 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -348,6 +348,8 @@ typedef struct nir_variable {
 
 #define nir_foreach_variable(var, var_list) \
    foreach_list_typed(nir_variable, var, node, var_list)
+#define nir_foreach_variable_safe(var, var_list) \
+   foreach_list_typed_safe(nir_variable, var, node, var_list)
 
 typedef struct nir_register {
    struct exec_node node;
diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index 500a5c0..b712bad 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -481,6 +481,7 @@ STATETRACKER_FILES = \
 	state_tracker/st_gen_mipmap.c \
 	state_tracker/st_gen_mipmap.h \
 	state_tracker/st_gl_api.h \
+	state_tracker/st_glsl_to_nir.cpp \
 	state_tracker/st_glsl_to_tgsi.cpp \
 	state_tracker/st_glsl_to_tgsi.h \
 	state_tracker/st_manager.c \
diff --git a/src/mesa/state_tracker/st_glsl_to_nir.cpp b/src/mesa/state_tracker/st_glsl_to_nir.cpp
new file mode 100644
index 0000000..8722102
--- /dev/null
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -0,0 +1,407 @@
+/*
+ * Copyright © 2015 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "st_nir.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_context.h"
+
+#include "program/program.h"
+#include "program/prog_statevars.h"
+#include "program/prog_parameter.h"
+#include "program/ir_to_mesa.h"
+#include "main/mtypes.h"
+#include "main/errors.h"
+#include "main/shaderapi.h"
+#include "main/uniforms.h"
+
+#include "st_context.h"
+#include "st_program.h"
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir/glsl_to_nir.h"
+#include "compiler/glsl_types.h"
+
+
+/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
+ * may need to fix up varying slots so the glsl->nir path is aligned
+ * with the anything->tgsi->nir path.
+ */
+static void
+st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
+{
+   if (st->needs_texcoord_semantic)
+      return;
+
+   nir_foreach_variable(var, var_list) {
+      if (var->data.location >= VARYING_SLOT_VAR0) {
+         var->data.location += 9;
+      } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
+               (var->data.location <= VARYING_SLOT_TEX7)) {
+         var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
+      }
+   }
+}
+
+/* input location assignment for VS inputs must be handled specially, so
+ * that it is aligned w/ st's vbo state.
+ * (This isn't the case with, for ex, FS inputs, which only need to agree
+ * on varying-slot w/ the VS outputs)
+ */
+static void
+st_nir_assign_vs_in_locations(struct gl_program *prog,
+                              struct exec_list *var_list, unsigned *size)
+{
+   unsigned attr, num_inputs = 0;
+   unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
+
+   /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
+   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
+      if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) {
+         input_to_index[attr] = num_inputs;
+         num_inputs++;
+         if ((prog->DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
+            /* add placeholder for second part of a double attribute */
+            num_inputs++;
+         }
+      }
+   }
+
+   *size = 0;
+   nir_foreach_variable(var, var_list) {
+      attr = var->data.location;
+      assert(attr < ARRAY_SIZE(input_to_index));
+      var->data.driver_location = input_to_index[attr];
+      (*size)++;
+   }
+}
+
+static void
+st_nir_assign_uniform_locations(struct gl_program *prog,
+                                struct exec_list *uniform_list, unsigned *size)
+{
+   int max = 0;
+   int shaderidx = 0;
+
+   nir_foreach_variable(uniform, uniform_list) {
+      int loc;
+
+      if (uniform->type->is_sampler()) {
+         loc = shaderidx++;
+         uniform->data.location = loc; /* this should match resulting sampler idx */
+      } else if (strncmp(uniform->name, "gl_", 3) == 0) {
+         const gl_state_index *const stateTokens = (gl_state_index *)uniform->state_slots[0].tokens;
+         /* This state reference has already been setup by ir_to_mesa, but we'll
+          * get the same index back here.
+          */
+         loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
+      } else {
+         loc = _mesa_lookup_parameter_index(prog->Parameters, -1, uniform->name);
+      }
+
+      /* is there a better way to do this?  If we have something like:
+       *
+       *    struct S {
+       *           float f;
+       *           vec4 v;
+       *    };
+       *    uniform S color;
+       *
+       * Then what we get in prog->Parameters looks like:
+       *
+       *    0: Name=color.f, Type=6, DataType=1406, Size=1
+       *    1: Name=color.v, Type=6, DataType=8b52, Size=4
+       *
+       * So the name doesn't match up and _mesa_lookup_parameter_index()
+       * fails.  In this case just find the first matching "color.*"..
+       */
+      if (loc < 0) {
+         int namelen = strlen(uniform->name);
+         for (unsigned i = 0; i < prog->Parameters->NumParameters; i++) {
+            struct gl_program_parameter *p = &prog->Parameters->Parameters[i];
+            if ((strncmp(p->Name, uniform->name, namelen) == 0) &&
+                (p->Name[namelen] == '.')) {
+               loc = i;
+               break;
+            }
+         }
+      }
+
+      uniform->data.driver_location = loc;
+      /*
+       * UBO's have their own address spaces, so don't count them towards the
+       * number of global uniforms
+       */
+      if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
+          uniform->interface_type != NULL)
+         continue;
+
+      max = MAX2(max, loc + glsl_type_size_vec4(uniform->type));
+   }
+   *size = max;
+}
+
+/* TODO probably get this from pipe_screen?
+ * NOTE we need this to not be on the stack, so it doesn't go away when
+ * get_mesa_program_nir() returns.. or just allow pipe driver to supply..
+ */
+static nir_shader_compiler_options nir_options;
+
+extern "C" {
+
+/* First half of converting glsl_to_nir.. this leaves things in a pre-
+ * nir_lower_io state, so that shader variants can more easily insert/
+ * replace variables, etc.
+ */
+nir_shader *
+st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
+               struct gl_shader_program *shader_program,
+               gl_shader_stage stage)
+{
+   nir_shader *nir;
+
+   if (prog->nir)
+      return prog->nir;
+
+   // XXX get from pipe_screen?  Or just let pipe driver provide?
+   nir_options.lower_fpow = true;
+   nir_options.lower_fsat = true;
+   nir_options.lower_scmp = true;
+   nir_options.lower_flrp = true;
+   nir_options.lower_ffract = true;
+   nir_options.native_integers = true;
+
+   nir = glsl_to_nir(shader_program, stage, &nir_options);
+   prog->nir = nir;
+
+   nir_validate_shader(nir);
+
+   st_nir_lower_builtin(nir);
+   nir_validate_shader(nir);
+
+   nir_lower_global_vars_to_local(nir);
+   nir_validate_shader(nir);
+
+   nir_split_var_copies(nir);
+   nir_lower_var_copies(nir);
+   nir_validate_shader(nir);
+
+   /* fragment shaders may need : */
+   if (stage == MESA_SHADER_FRAGMENT) {
+      static const gl_state_index wposTransformState[STATE_LENGTH] = {
+         STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
+      };
+      nir_lower_wpos_ytransform_options wpos_options = {0};
+      struct pipe_screen *pscreen = st->pipe->screen;
+
+      memcpy(wpos_options.state_tokens, wposTransformState,
+             sizeof(wpos_options.state_tokens));
+      wpos_options.fs_coord_origin_upper_left =
+         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
+      wpos_options.fs_coord_origin_lower_left =
+         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
+      wpos_options.fs_coord_pixel_center_integer =
+         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
+      wpos_options.fs_coord_pixel_center_half_integer =
+         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
+
+      if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
+         _mesa_add_state_reference(prog->Parameters, wposTransformState);
+      }
+   }
+
+   // XXX do we need anything else from brw_create_nir().. and what
+   // is best way to split up which things should be here vs driver?
+   // currently just trying to make the result here similar to what
+   // we get from tgsi_to_nir().. so lower_io, etc..
+
+   if (st->ctx->_Shader->Flags & GLSL_DUMP) {
+      _mesa_log("\n");
+      _mesa_log("NIR IR for linked %s program %d:\n",
+             _mesa_shader_stage_to_string(stage),
+             shader_program->Name);
+      nir_print_shader(nir, _mesa_get_log_file());
+      _mesa_log("\n\n");
+   }
+
+   return nir;
+}
+
+/* TODO any better helper somewhere to sort a list? */
+
+static void
+insert_sorted(struct exec_list *var_list, nir_variable *new_var)
+{
+   nir_foreach_variable(var, var_list) {
+      if (var->data.location > new_var->data.location) {
+         exec_node_insert_node_before(&var->node, &new_var->node);
+         return;
+      }
+   }
+   exec_list_push_tail(var_list, &new_var->node);
+}
+
+static void
+sort_varyings(struct exec_list *var_list)
+{
+   struct exec_list new_list;
+   exec_list_make_empty(&new_list);
+   nir_foreach_variable_safe(var, var_list) {
+      exec_node_remove(&var->node);
+      insert_sorted(&new_list, var);
+   }
+   exec_list_move_nodes_to(&new_list, var_list);
+}
+
+/* Second half of preparing nir from glsl, which happens after shader
+ * variant lowering.
+ */
+void
+st_finalize_nir(struct st_context *st, struct gl_program *prog, nir_shader *nir)
+{
+   if (nir->stage == MESA_SHADER_VERTEX) {
+      /* Needs special handling so drvloc matches the vbo state: */
+      st_nir_assign_vs_in_locations(prog, &nir->inputs, &nir->num_inputs);
+      sort_varyings(&nir->outputs);
+      nir_assign_var_locations(&nir->outputs,
+                               &nir->num_outputs,
+                               glsl_type_size_vec4);
+      st_nir_fixup_varying_slots(st, &nir->outputs);
+   } else if (nir->stage == MESA_SHADER_FRAGMENT) {
+      sort_varyings(&nir->inputs);
+      nir_assign_var_locations(&nir->inputs,
+                               &nir->num_inputs,
+                               glsl_type_size_vec4);
+      st_nir_fixup_varying_slots(st, &nir->inputs);
+      nir_assign_var_locations(&nir->outputs,
+                               &nir->num_outputs,
+                               glsl_type_size_vec4);
+   } else {
+      unreachable("invalid shader type for tgsi bypass\n");
+   }
+
+   st_nir_assign_uniform_locations(prog, &nir->uniforms, &nir->num_uniforms);
+
+   nir_lower_system_values(nir);
+   nir_lower_io(nir, nir_var_all, glsl_type_size_vec4);
+   nir_lower_samplers(nir, NULL);
+   nir_validate_shader(nir);
+}
+
+struct gl_program *
+st_nir_get_mesa_program(struct gl_context *ctx,
+                        struct gl_shader_program *shader_program,
+                        struct gl_shader *shader)
+{
+   struct gl_program *prog;
+   GLenum target = _mesa_shader_stage_to_program(shader->Stage);
+
+   validate_ir_tree(shader->ir);
+
+   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
+   if (!prog)
+      return NULL;
+
+   prog->Parameters = _mesa_new_parameter_list();
+
+   _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
+   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
+                                               prog->Parameters);
+
+   /* Make a pass over the IR to add state references for any built-in
+    * uniforms that are used.  This has to be done now (during linking).
+    * Code generation doesn't happen until the first time this shader is
+    * used for rendering.  Waiting until then to generate the parameters is
+    * too late.  At that point, the values for the built-in uniforms won't
+    * get sent to the shader.
+    */
+   foreach_in_list(ir_instruction, node, shader->ir) {
+      ir_variable *var = node->as_variable();
+
+      if ((var == NULL) || (var->data.mode != ir_var_uniform) ||
+          (strncmp(var->name, "gl_", 3) != 0))
+         continue;
+
+      const ir_state_slot *const slots = var->get_state_slots();
+      assert(slots != NULL);
+
+      for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
+         _mesa_add_state_reference(prog->Parameters,
+                                   (gl_state_index *) slots[i].tokens);
+      }
+   }
+
+   if (ctx->_Shader->Flags & GLSL_DUMP) {
+      _mesa_log("\n");
+      _mesa_log("GLSL IR for linked %s program %d:\n",
+             _mesa_shader_stage_to_string(shader->Stage),
+             shader_program->Name);
+      _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
+      _mesa_log("\n\n");
+   }
+
+   prog->Instructions = NULL;
+   prog->NumInstructions = 0;
+
+   do_set_program_inouts(shader->ir, prog, shader->Stage);
+
+   prog->SamplersUsed = shader->active_samplers;
+   prog->ShadowSamplers = shader->shadow_samplers;
+   _mesa_update_shader_textures_used(shader_program, prog);
+
+   _mesa_reference_program(ctx, &shader->Program, prog);
+
+   /* Avoid reallocation of the program parameter list, because the uniform
+    * storage is only associated with the original parameter list.
+    * This should be enough for Bitmap and DrawPixels constants.
+    */
+   _mesa_reserve_parameter_storage(prog->Parameters, 8);
+
+   /* This has to be done last.  Any operation the can cause
+    * prog->ParameterValues to get reallocated (e.g., anything that adds a
+    * program constant) has to happen before creating this linkage.
+    */
+   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
+
+   struct st_vertex_program *stvp;
+   struct st_fragment_program *stfp;
+
+   switch (shader->Type) {
+   case GL_VERTEX_SHADER:
+      stvp = (struct st_vertex_program *)prog;
+      stvp->shader_program = shader_program;
+      break;
+   case GL_FRAGMENT_SHADER:
+      stfp = (struct st_fragment_program *)prog;
+      stfp->shader_program = shader_program;
+      break;
+   default:
+      assert(!"should not be reached");
+      return NULL;
+   }
+
+   return prog;
+}
+
+} /* extern "C" */
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index d6459e5..d1f3a81 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -51,6 +51,7 @@
 #include "util/u_memory.h"
 #include "st_program.h"
 #include "st_mesa_to_tgsi.h"
+#include "st_nir.h"
 
 
 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
@@ -5840,9 +5841,9 @@ out:
  * generating Mesa IR.
  */
 static struct gl_program *
-get_mesa_program(struct gl_context *ctx,
-                 struct gl_shader_program *shader_program,
-                 struct gl_shader *shader)
+get_mesa_program_tgsi(struct gl_context *ctx,
+                      struct gl_shader_program *shader_program,
+                      struct gl_shader *shader)
 {
    glsl_to_tgsi_visitor* v;
    struct gl_program *prog;
@@ -6041,6 +6042,29 @@ get_mesa_program(struct gl_context *ctx,
    return prog;
 }
 
+static struct gl_program *
+get_mesa_program(struct gl_context *ctx,
+                 struct gl_shader_program *shader_program,
+                 struct gl_shader *shader)
+{
+   struct pipe_screen *pscreen = ctx->st->pipe->screen;
+   unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
+   enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
+      pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
+   if (preferred_ir == PIPE_SHADER_IR_NIR) {
+      /* TODO only for GLSL VS/FS for now: */
+      switch (shader->Type) {
+      case GL_VERTEX_SHADER:
+      case GL_FRAGMENT_SHADER:
+         return st_nir_get_mesa_program(ctx, shader_program, shader);
+      default:
+         break;
+      }
+   }
+   return get_mesa_program_tgsi(ctx, shader_program, shader);
+}
+
+
 extern "C" {
 
 static void
@@ -6241,9 +6265,17 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
                                 const GLuint outputMapping[],
                                 struct pipe_stream_output_info *so)
 {
-   unsigned i;
    struct gl_transform_feedback_info *info =
       &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
+   st_translate_stream_output_info2(info, outputMapping, so);
+}
+
+void
+st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
+                                const GLuint outputMapping[],
+                                struct pipe_stream_output_info *so)
+{
+   unsigned i;
 
    for (i = 0; i < info->NumOutputs; i++) {
       so->output[i].register_index =
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
index 729295b..1986025 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
@@ -63,6 +63,11 @@ st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi,
                                 const GLuint outputMapping[],
                                 struct pipe_stream_output_info *so);
 
+void
+st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
+                                const GLuint outputMapping[],
+                                struct pipe_stream_output_info *so);
+
 extern const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX];
 
 #ifdef __cplusplus
diff --git a/src/mesa/state_tracker/st_nir.h b/src/mesa/state_tracker/st_nir.h
index 1c07c4c..7d49b0d 100644
--- a/src/mesa/state_tracker/st_nir.h
+++ b/src/mesa/state_tracker/st_nir.h
@@ -23,6 +23,29 @@
 
 #pragma once
 
+#include "st_context.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct nir_shader nir_shader;
 
 void st_nir_lower_builtin(nir_shader *shader);
+
+#include "compiler/shader_enums.h"
+
+nir_shader * st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
+                            struct gl_shader_program *shader_program,
+                            gl_shader_stage stage);
+
+void st_finalize_nir(struct st_context *st, struct gl_program *prog, nir_shader *nir);
+
+struct gl_program *
+st_nir_get_mesa_program(struct gl_context *ctx,
+                        struct gl_shader_program *shader_program,
+                        struct gl_shader *shader);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
index b395454..d8df767 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -38,6 +38,9 @@
 #include "program/prog_print.h"
 #include "program/programopt.h"
 
+#include "compiler/nir/nir.h"
+#include "nir/nir_emulate.h"
+
 #include "pipe/p_context.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
@@ -53,6 +56,7 @@
 #include "st_context.h"
 #include "st_program.h"
 #include "st_mesa_to_tgsi.h"
+#include "st_nir.h"
 #include "cso_cache/cso_context.h"
 
 
@@ -69,10 +73,10 @@ delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
       
    if (vpv->draw_shader)
       draw_delete_vertex_shader( st->draw, vpv->draw_shader );
-      
-   if (vpv->tgsi.tokens)
+
+   if (((vpv->tgsi.type == PIPE_SHADER_IR_TGSI)) && vpv->tgsi.tokens)
       ureg_free_tokens(vpv->tgsi.tokens);
-      
+
    free( vpv );
 }
 
@@ -95,7 +99,7 @@ st_release_vp_variants( struct st_context *st,
 
    stvp->variants = NULL;
 
-   if (stvp->tgsi.tokens) {
+   if ((stvp->tgsi.type == PIPE_SHADER_IR_TGSI) && stvp->tgsi.tokens) {
       tgsi_free_tokens(stvp->tgsi.tokens);
       stvp->tgsi.tokens = NULL;
    }
@@ -132,7 +136,7 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
 
    stfp->variants = NULL;
 
-   if (stfp->tgsi.tokens) {
+   if ((stfp->tgsi.type == PIPE_SHADER_IR_TGSI) && stfp->tgsi.tokens) {
       ureg_free_tokens(stfp->tgsi.tokens);
       stfp->tgsi.tokens = NULL;
    }
@@ -386,9 +390,25 @@ st_translate_vertex_program(struct st_context *st,
    output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
    output_semantic_index[num_outputs] = 0;
 
-   if (!stvp->glsl_to_tgsi)
+   if (!stvp->glsl_to_tgsi && !stvp->shader_program)
       _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
 
+   if (stvp->shader_program) {
+      nir_shader *nir = st_glsl_to_nir(st, &stvp->Base.Base,
+                                       stvp->shader_program,
+                                       MESA_SHADER_VERTEX);
+
+      st_finalize_nir(st, &stvp->Base.Base, nir);
+
+      stvp->tgsi.type = PIPE_SHADER_IR_NIR;
+      stvp->tgsi.ir.nir = nir;
+
+      st_translate_stream_output_info2(&stvp->shader_program->LinkedTransformFeedback,
+                                       stvp->result_to_output,
+                                       &stvp->tgsi.stream_output);
+      return true;
+   }
+
    ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen);
    if (ureg == NULL)
       return false;
@@ -459,6 +479,15 @@ st_translate_vertex_program(struct st_context *st,
    return stvp->tgsi.tokens != NULL;
 }
 
+static unsigned
+st_vp_variant_flags(const struct st_vp_variant_key *key)
+{
+   unsigned flags =
+      (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
+      (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
+   return flags;
+}
+
 static struct st_vp_variant *
 st_create_vp_variant(struct st_context *st,
                      struct st_vertex_program *stvp,
@@ -466,18 +495,29 @@ st_create_vp_variant(struct st_context *st,
 {
    struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
    struct pipe_context *pipe = st->pipe;
+   unsigned flags = st_vp_variant_flags(key);
 
    vpv->key = *key;
-   vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
    vpv->tgsi.stream_output = stvp->tgsi.stream_output;
    vpv->num_inputs = stvp->num_inputs;
 
+   if (stvp->tgsi.type == PIPE_SHADER_IR_NIR) {
+      vpv->tgsi.type = PIPE_SHADER_IR_NIR;
+      vpv->tgsi.ir.nir = nir_shader_clone(NULL, stvp->tgsi.ir.nir);
+      if (flags) {
+         nir_emulate(vpv->tgsi.ir.nir, flags);
+      }
+      vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
+      /* driver takes ownership of IR: */
+      vpv->tgsi.ir.nir = NULL;
+      return vpv;
+   }
+
+   vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
+
    /* Emulate features. */
-   if (key->clamp_color || key->passthrough_edgeflags) {
+   if (flags) {
       const struct tgsi_token *tokens;
-      unsigned flags =
-         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
-         (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
 
       tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
 
@@ -581,7 +621,7 @@ st_translate_fragment_program(struct st_context *st,
 
    memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
 
-   if (!stfp->glsl_to_tgsi) {
+   if (!stfp->glsl_to_tgsi && !stfp->shader_program) {
       _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
       if (st->ctx->Const.GLSLFragCoordIsSysVal)
          _mesa_program_fragment_position_to_sysval(&stfp->Base.Base);
@@ -788,6 +828,17 @@ st_translate_fragment_program(struct st_context *st,
       }
    }
 
+   if (stfp->shader_program) {
+      nir_shader *nir = st_glsl_to_nir(st, &stfp->Base.Base,
+                                       stfp->shader_program,
+                                       MESA_SHADER_FRAGMENT);
+
+      stfp->tgsi.type = PIPE_SHADER_IR_NIR;
+      stfp->tgsi.ir.nir = nir;
+
+      return true;
+   }
+
    ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen);
    if (ureg == NULL)
       return false;
@@ -868,6 +919,15 @@ st_translate_fragment_program(struct st_context *st,
    return stfp->tgsi.tokens != NULL;
 }
 
+static unsigned
+st_fp_variant_flags(const struct st_fp_variant_key *key)
+{
+   unsigned flags =
+      (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
+      (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
+   return flags;
+}
+
 static struct st_fp_variant *
 st_create_fp_variant(struct st_context *st,
                      struct st_fragment_program *stfp,
@@ -876,20 +936,86 @@ st_create_fp_variant(struct st_context *st,
    struct pipe_context *pipe = st->pipe;
    struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
    struct pipe_shader_state tgsi = {0};
+   unsigned flags = st_fp_variant_flags(key);
+   struct gl_program_parameter_list *params = stfp->Base.Base.Parameters;
+   static const gl_state_index texcoord_state[STATE_LENGTH] =
+      { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
+   static const gl_state_index scale_state[STATE_LENGTH] =
+      { STATE_INTERNAL, STATE_PT_SCALE };
+   static const gl_state_index bias_state[STATE_LENGTH] =
+      { STATE_INTERNAL, STATE_PT_BIAS };
 
    if (!variant)
       return NULL;
 
+   if (stfp->tgsi.type == PIPE_SHADER_IR_NIR) {
+      tgsi.type = PIPE_SHADER_IR_NIR;
+      tgsi.ir.nir = nir_shader_clone(NULL, stfp->tgsi.ir.nir);
+
+      if (flags)
+         nir_emulate(tgsi.ir.nir, flags);
+
+      assert(!(key->bitmap && key->drawpixels));
+
+      /* glBitmap */
+      if (key->bitmap) {
+         nir_lower_bitmap_options options = {0};
+
+         variant->bitmap_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
+         options.sampler = variant->bitmap_sampler;
+         options.swizzle_xxxx = (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM);
+
+         nir_lower_bitmap(tgsi.ir.nir, &options);
+      }
+
+      /* glDrawPixels (color only) */
+      if (key->drawpixels) {
+         nir_lower_drawpixels_options options = {0};
+         unsigned samplers_used = stfp->Base.Base.SamplersUsed;
+
+         /* Find the first unused slot. */
+         variant->drawpix_sampler = ffs(~samplers_used) - 1;
+         options.drawpix_sampler = variant->drawpix_sampler;
+         samplers_used |= (1 << variant->drawpix_sampler);
+
+         options.pixel_maps = key->pixelMaps;
+         if (key->pixelMaps) {
+            variant->pixelmap_sampler = ffs(~samplers_used) - 1;
+            options.pixelmap_sampler = variant->pixelmap_sampler;
+         }
+
+         options.scale_and_bias = key->scaleAndBias;
+         if (key->scaleAndBias) {
+            _mesa_add_state_reference(params, scale_state);
+            memcpy(options.scale_state_tokens, scale_state,
+                   sizeof(options.scale_state_tokens));
+            _mesa_add_state_reference(params, bias_state);
+            memcpy(options.bias_state_tokens, bias_state,
+                   sizeof(options.bias_state_tokens));
+         }
+
+         _mesa_add_state_reference(params, texcoord_state);
+         memcpy(options.texcoord_state_tokens, texcoord_state,
+                sizeof(options.texcoord_state_tokens));
+
+         nir_lower_drawpixels(tgsi.ir.nir, &options);
+      }
+
+      st_finalize_nir(st, &stfp->Base.Base, tgsi.ir.nir);
+
+      variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
+      variant->key = *key;
+
+      return variant;
+   }
+
    tgsi.tokens = stfp->tgsi.tokens;
 
    assert(!(key->bitmap && key->drawpixels));
 
    /* Emulate features. */
-   if (key->clamp_color || key->persample_shading) {
+   if (flags) {
       const struct tgsi_token *tokens;
-      unsigned flags =
-         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
-         (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
 
       tokens = tgsi_emulate(tgsi.tokens, flags);
 
@@ -923,7 +1049,6 @@ st_create_fp_variant(struct st_context *st,
    if (key->drawpixels) {
       const struct tgsi_token *tokens;
       unsigned scale_const = 0, bias_const = 0, texcoord_const = 0;
-      struct gl_program_parameter_list *params = stfp->Base.Base.Parameters;
 
       /* Find the first unused slot. */
       variant->drawpix_sampler = ffs(~stfp->Base.Base.SamplersUsed) - 1;
@@ -936,21 +1061,11 @@ st_create_fp_variant(struct st_context *st,
       }
 
       if (key->scaleAndBias) {
-         static const gl_state_index scale_state[STATE_LENGTH] =
-            { STATE_INTERNAL, STATE_PT_SCALE };
-         static const gl_state_index bias_state[STATE_LENGTH] =
-            { STATE_INTERNAL, STATE_PT_BIAS };
-
          scale_const = _mesa_add_state_reference(params, scale_state);
          bias_const = _mesa_add_state_reference(params, bias_state);
       }
 
-      {
-         static const gl_state_index state[STATE_LENGTH] =
-            { STATE_INTERNAL, STATE_CURRENT_ATTRIB, VERT_ATTRIB_TEX0 };
-
-         texcoord_const = _mesa_add_state_reference(params, state);
-      }
+      texcoord_const = _mesa_add_state_reference(params, texcoord_state);
 
       tokens = st_get_drawpix_shader(tgsi.tokens,
                                      st->needs_texcoord_semantic,
diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
index a745315..869d6a1 100644
--- a/src/mesa/state_tracker/st_program.h
+++ b/src/mesa/state_tracker/st_program.h
@@ -100,6 +100,9 @@ struct st_fragment_program
    struct pipe_shader_state tgsi;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
+   /* used when bypassing glsl_to_tgsi: */
+   struct gl_shader_program *shader_program;
+
    struct st_fp_variant *variants;
 };
 
@@ -156,6 +159,9 @@ struct st_vertex_program
    struct pipe_shader_state tgsi;
    struct glsl_to_tgsi_visitor* glsl_to_tgsi;
 
+   /* used when bypassing glsl_to_tgsi: */
+   struct gl_shader_program *shader_program;
+
    /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
    /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
    GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
-- 
2.5.0