[Mesa-dev] [RFCv2 06/13] mesa/st: add support for NIR as possible driver IR

Tue Nov 10 07:59:10 PST 2015

On Sun, Nov 8, 2015 at 3:12 PM, Rob Clark <robdclark at gmail.com> wrote:
> ---
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 410 ++++++++++++++++++++++++++++-
>  src/mesa/state_tracker/st_glsl_to_tgsi.h   |   5 +
>  src/mesa/state_tracker/st_program.c        | 118 +++++++--
>  src/mesa/state_tracker/st_program.h        |   6 +
>  4 files changed, 520 insertions(+), 19 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index f481e89..fbc598e 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -35,6 +35,9 @@
>  #include "glsl_parser_extras.h"
>  #include "ir_optimization.h"
>
> +#include "nir.h"
> +#include "glsl_to_nir.h"
> +
>  #include "main/errors.h"
>  #include "main/shaderobj.h"
>  #include "main/uniforms.h"
> @@ -5486,9 +5489,9 @@ out:
>   * generating Mesa IR.
>   */
>  static struct gl_program *
> -get_mesa_program(struct gl_context *ctx,
> -                 struct gl_shader_program *shader_program,
> -                 struct gl_shader *shader)
> +get_mesa_program_tgsi(struct gl_context *ctx,
> +                      struct gl_shader_program *shader_program,
> +                      struct gl_shader *shader)
>  {
>     glsl_to_tgsi_visitor* v;
>     struct gl_program *prog;
> @@ -5680,6 +5683,396 @@ get_mesa_program(struct gl_context *ctx,
>     return prog;
>  }
>
> +/* TODO dup'd from brw_vec4_vistor.cpp..  what should we do? */
> +static int
> +type_size_vec4(const struct glsl_type *type)
> +{
> +   unsigned int i;
> +   int size;
> +
> +   switch (type->base_type) {
> +   case GLSL_TYPE_UINT:
> +   case GLSL_TYPE_INT:
> +   case GLSL_TYPE_FLOAT:
> +   case GLSL_TYPE_BOOL:
> +      if (type->is_matrix()) {
> +        return type->matrix_columns;
> +      } else {
> +        /* Regardless of size of vector, it gets a vec4. This is bad
> +         * packing for things like floats, but otherwise arrays become a
> +         * mess.  Hopefully a later pass over the code can pack scalars
> +         * down if appropriate.
> +         */
> +        return 1;
> +      }
> +   case GLSL_TYPE_ARRAY:
> +      assert(type->length > 0);
> +      return type_size_vec4(type->fields.array) * type->length;
> +   case GLSL_TYPE_STRUCT:
> +      size = 0;
> +      for (i = 0; i < type->length; i++) {
> +        size += type_size_vec4(type->fields.structure[i].type);
> +      }
> +      return size;
> +   case GLSL_TYPE_SUBROUTINE:
> +      return 1;
> +
> +   case GLSL_TYPE_SAMPLER:
> +      /* Samplers take up no register space, since they're baked in at
> +       * link time.
> +       */
> +      return 0;
> +   case GLSL_TYPE_ATOMIC_UINT:
> +      return 0;
> +   case GLSL_TYPE_IMAGE:
> +//      return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
> +   case GLSL_TYPE_VOID:
> +   case GLSL_TYPE_DOUBLE:
> +   case GLSL_TYPE_ERROR:
> +   case GLSL_TYPE_INTERFACE:
> +      unreachable("not reached");
> +   }
> +
> +   return 0;
> +}
> +
> +/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
> + * may need to fix up varying slots so the glsl->nir path is aligned
> + * with the anything->tgsi->nir path.
> + */
> +static void
> +st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
> +{
> +   if (st->needs_texcoord_semantic)
> +      return;
> +
> +   nir_foreach_variable(var, var_list) {
> +      if (var->data.location >= VARYING_SLOT_VAR0) {
> +         var->data.location += 9;
> +      } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
> +               (var->data.location <= VARYING_SLOT_TEX7)) {
> +         var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
> +      }
> +   }
> +}
> +
> +/* input location assignment for VS inputs must be handled specially, so
> + * that it is aligned w/ st's vbo state.
> + * (This isn't the case with, for ex, FS inputs, which only need to agree
> + * on varying-slot w/ the VS outputs)
> + */
> +static void
> +st_nir_assign_vs_in_locations(struct gl_program *prog,
> +                              struct exec_list *var_list, unsigned *size)
> +{
> +   unsigned attr, num_inputs = 0;
> +   unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
> +
> +   /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
> +   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
> +      if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) {
> +         input_to_index[attr] = num_inputs;
> +         num_inputs++;
> +         if ((prog->DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
> +            /* add placeholder for second part of a double attribute */
> +            num_inputs++;
> +         }
> +      }
> +   }
> +
> +   *size = 0;
> +   nir_foreach_variable(var, var_list) {
> +      attr = var->data.location;
> +      assert(attr < ARRAY_SIZE(input_to_index));
> +      var->data.driver_location = input_to_index[attr];
> +      (*size)++;
> +   }
> +}
> +
> +static void
> +st_nir_assign_uniform_locations(struct gl_program *prog,
> +                                struct exec_list *uniform_list, unsigned *size)
> +{
> +   int max = 0;
> +   int shaderidx = 0;
> +
> +   nir_foreach_variable(uniform, uniform_list) {
> +      int loc;
> +
> +      if (uniform->type->is_sampler()) {
> +         loc = shaderidx++;
> +         uniform->data.location = loc; /* this should match resulting sampler idx */
> +      } else if (strncmp(uniform->name, "gl_", 3) == 0) {
> +         const gl_state_index *const stateTokens = (gl_state_index *)uniform->state_slots[0].tokens;
> +         /* This state reference has already been setup by ir_to_mesa, but we'll
> +          * get the same index back here.
> +          */
> +         loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
> +      } else {
> +         loc = _mesa_lookup_parameter_index(prog->Parameters, -1, uniform->name);
> +      }
> +
> +      /* is there a better way to do this?  If we have something like:
> +       *
> +       *    struct S {
> +       *           float f;
> +       *           vec4 v;
> +       *    };
> +       *    uniform S color;
> +       *
> +       * Then what we get in prog->Parameters looks like:
> +       *
> +       *    0: Name=color.f, Type=6, DataType=1406, Size=1
> +       *    1: Name=color.v, Type=6, DataType=8b52, Size=4
> +       *
> +       * So the name doesn't match up and _mesa_lookup_parameter_index()
> +       * fails.  In this case just find the first matching "color.*"..
> +       */
> +      if (loc < 0) {
> +         int namelen = strlen(uniform->name);
> +         for (unsigned i = 0; i < prog->Parameters->NumParameters; i++) {
> +            struct gl_program_parameter *p = &prog->Parameters->Parameters[i];
> +            if ((strncmp(p->Name, uniform->name, namelen) == 0) &&
> +                (p->Name[namelen] == '.')) {
> +               loc = i;
> +               break;
> +            }
> +         }
> +      }
> +
> +      uniform->data.driver_location = loc;
> +      if (!uniform->type->is_sampler())
> +         max = MAX2(max, loc + type_size_vec4(uniform->type));
> +   }
> +   *size = max;
> +}

Do you really need to do all this here? It seems to me that it would
be much less painless if both tgsi_to_nir and the st just used
variables and the location field for IO, and then left calling
nir_lower_io() to the driver. This is the intention of the whole
driver_location thing -- it's supposed to be decided by the driver
(hence the name ;) ) and map 1:1, or at least relatively closely, to
the indices the driver actually wants. We recently refactored i965 to
basically do that, and it simplified things a lot, so it's been proven
that there's at least some utility to the idea. It would also
alleviate some of the problems you've been having with allocating
extra variables in NIR lowering passes inside your driver, and make
those lowering passes useful for i965 as well.

> +
> +static struct gl_program *
> +get_mesa_program_nir(struct gl_context *ctx,
> +                     struct gl_shader_program *shader_program,
> +                     struct gl_shader *shader)
> +{
> +   struct gl_program *prog;
> +   GLenum target = _mesa_shader_stage_to_program(shader->Stage);
> +
> +   validate_ir_tree(shader->ir);
> +
> +   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
> +   if (!prog)
> +      return NULL;
> +
> +   prog->Parameters = _mesa_new_parameter_list();
> +
> +   _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
> +   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
> +                                               prog->Parameters);
> +
> +//   /* Remove reads from output registers. */
> +//   lower_output_reads(shader->Stage, shader->ir);
> +
> +   /* Make a pass over the IR to add state references for any built-in
> +    * uniforms that are used.  This has to be done now (during linking).
> +    * Code generation doesn't happen until the first time this shader is
> +    * used for rendering.  Waiting until then to generate the parameters is
> +    * too late.  At that point, the values for the built-in uniforms won't
> +    * get sent to the shader.
> +    */
> +   foreach_in_list(ir_instruction, node, shader->ir) {
> +      ir_variable *var = node->as_variable();
> +
> +      if ((var == NULL) || (var->data.mode != ir_var_uniform) ||
> +          (strncmp(var->name, "gl_", 3) != 0))
> +         continue;
> +
> +      const ir_state_slot *const slots = var->get_state_slots();
> +      assert(slots != NULL);
> +
> +      for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
> +         _mesa_add_state_reference(prog->Parameters,
> +                                   (gl_state_index *) slots[i].tokens);
> +      }
> +   }
> +
> +   if (ctx->_Shader->Flags & GLSL_DUMP) {
> +      _mesa_log("\n");
> +      _mesa_log("GLSL IR for linked %s program %d:\n",
> +             _mesa_shader_stage_to_string(shader->Stage),
> +             shader_program->Name);
> +      _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
> +      _mesa_log("\n\n");
> +   }
> +
> +   prog->Instructions = NULL;
> +   prog->NumInstructions = 0;
> +
> +   do_set_program_inouts(shader->ir, prog, shader->Stage);
> +
> +   prog->SamplersUsed = shader->active_samplers;
> +   prog->ShadowSamplers = shader->shadow_samplers;
> +   _mesa_update_shader_textures_used(shader_program, prog);
> +
> +   _mesa_reference_program(ctx, &shader->Program, prog);
> +
> +
> +   /* This has to be done last.  Any operation the can cause
> +    * prog->ParameterValues to get reallocated (e.g., anything that adds a
> +    * program constant) has to happen before creating this linkage.
> +    */
> +   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
> +
> +   struct st_vertex_program *stvp;
> +   struct st_fragment_program *stfp;
> +
> +   switch (shader->Type) {
> +   case GL_VERTEX_SHADER:
> +      stvp = (struct st_vertex_program *)prog;
> +      stvp->shader_program = shader_program;
> +      break;
> +   case GL_FRAGMENT_SHADER:
> +      stfp = (struct st_fragment_program *)prog;
> +      stfp->shader_program = shader_program;
> +      break;
> +   default:
> +      assert(!"should not be reached");
> +      return NULL;
> +   }
> +
> +   return prog;
> +}
> +
> +
> +/* TODO probably get this from pipe_screen?
> + * NOTE we need this to not be on the stack, so it doesn't go away when
> + * get_mesa_program_nir() returns.. or just allow pipe driver to supply..
> + */
> +static nir_shader_compiler_options nir_options;
> +
> +// XXX move this!
> +extern "C" {
> +nir_shader *
> +st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
> +               struct gl_shader_program *shader_program,
> +               gl_shader_stage stage)
> +{
> +   nir_shader *nir;
> +
> +   if (prog->nir)
> +      return prog->nir;
> +
> +   // XXX get from pipe_screen?  Or just let pipe driver provide?
> +   nir_options.lower_fpow = true;
> +   nir_options.lower_fsat = true;
> +   nir_options.lower_scmp = true;
> +   nir_options.lower_flrp = true;
> +   nir_options.lower_ffract = true;
> +   nir_options.native_integers = true;
> +
> +   nir = glsl_to_nir(shader_program, stage, &nir_options);
> +   prog->nir = nir;
> +
> +   nir_validate_shader(nir);
> +
> +   nir_print_shader(nir, _mesa_get_log_file());
> +
> +   nir_lower_global_vars_to_local(nir);
> +   nir_validate_shader(nir);
> +
> +   nir_split_var_copies(nir);
> +   nir_lower_var_copies(nir);
> +   nir_validate_shader(nir);
> +
> +   /* fragment shaders may need : */
> +   if (stage == MESA_SHADER_FRAGMENT) {
> +      static const gl_state_index wposTransformState[STATE_LENGTH] = {
> +         STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
> +      };
> +      nir_lower_wpos_ytransform_options wpos_options = {0};
> +      struct pipe_screen *pscreen = st->pipe->screen;
> +
> +      memcpy(wpos_options.state_tokens, wposTransformState,
> +             sizeof(wpos_options.state_tokens));
> +      wpos_options.fs_coord_origin_upper_left =
> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
> +      wpos_options.fs_coord_origin_lower_left =
> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
> +      wpos_options.fs_coord_pixel_center_integer =
> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
> +      wpos_options.fs_coord_pixel_center_half_integer =
> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
> +
> +      if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
> +         _mesa_add_state_reference(prog->Parameters, wposTransformState);
> +      }
> +   }
> +
> +   if (stage == MESA_SHADER_VERTEX) {
> +      /* Needs special handling so drvloc matches the vbo state: */
> +      st_nir_assign_vs_in_locations(prog, &nir->inputs, &nir->num_inputs);
> +      nir_assign_var_locations(&nir->outputs,
> +                               &nir->num_outputs,
> +                               type_size_vec4);
> +      st_nir_fixup_varying_slots(st, &nir->outputs);
> +   } else if (stage == MESA_SHADER_FRAGMENT) {
> +      nir_assign_var_locations(&nir->inputs,
> +                               &nir->num_inputs,
> +                               type_size_vec4);
> +      st_nir_fixup_varying_slots(st, &nir->inputs);
> +      nir_assign_var_locations(&nir->outputs,
> +                               &nir->num_outputs,
> +                               type_size_vec4);
> +   } else {
> +      unreachable("invalid shader type for tgsi bypass\n");
> +   }
> +
> +   st_nir_assign_uniform_locations(prog, &nir->uniforms, &nir->num_uniforms);
> +
> +   nir_lower_system_values(nir);
> +   nir_lower_io(nir, nir_var_all, type_size_vec4);
> +   nir_lower_samplers(nir, NULL);
> +   nir_validate_shader(nir);
> +
> +   // XXX do we need anything else from brw_create_nir().. and what
> +   // is best way to split up which things should be here vs driver?
> +   // currently just trying to make the result here similar to what
> +   // we get from tgsi_to_nir().. so lower_io, etc..
> +
> +   // XXX new flag, probably?
> +   if (st->ctx->_Shader->Flags & GLSL_DUMP) {
> +      _mesa_log("\n");
> +      _mesa_log("NIR IR for linked %s program %d:\n",
> +             _mesa_shader_stage_to_string(stage),
> +             shader_program->Name);
> +      nir_print_shader(nir, _mesa_get_log_file());
> +      _mesa_log("\n\n");
> +   }
> +
> +   return nir;
> +}
> +}
> +
> +static struct gl_program *
> +get_mesa_program(struct gl_context *ctx,
> +                 struct gl_shader_program *shader_program,
> +                 struct gl_shader *shader)
> +{
> +   struct pipe_screen *pscreen = ctx->st->pipe->screen;
> +   unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
> +   enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
> +      pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
> +   if (preferred_ir == PIPE_SHADER_IR_NIR) {
> +      /* TODO only for GLSL VS/FS for now: */
> +      switch (shader->Type) {
> +      case GL_VERTEX_SHADER:
> +      case GL_FRAGMENT_SHADER:
> +         return get_mesa_program_nir(ctx, shader_program, shader);
> +      default:
> +         break;
> +      }
> +   }
> +   return get_mesa_program_tgsi(ctx, shader_program, shader);
> +}
> +
> +
>  extern "C" {
>
>  static void
> @@ -5880,9 +6273,18 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
>                                  const GLuint outputMapping[],
>                                  struct pipe_stream_output_info *so)
>  {
> -   unsigned i;
>     struct gl_transform_feedback_info *info =
>        &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
> +   st_translate_stream_output_info2(info, outputMapping, so);
> +}
> +
> +/* TODO better name, and split out into own patch.. */
> +void
> +st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
> +                                const GLuint outputMapping[],
> +                                struct pipe_stream_output_info *so)
> +{
> +   unsigned i;
>
>     for (i = 0; i < info->NumOutputs; i++) {
>        so->output[i].register_index =
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
> index 729295b..1986025 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
> @@ -63,6 +63,11 @@ st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi,
>                                  const GLuint outputMapping[],
>                                  struct pipe_stream_output_info *so);
>
> +void
> +st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
> +                                const GLuint outputMapping[],
> +                                struct pipe_stream_output_info *so);
> +
>  extern const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX];
>
>  #ifdef __cplusplus
> diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
> index 75ccaf2..d2e4627 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -38,6 +38,9 @@
>  #include "program/prog_print.h"
>  #include "program/programopt.h"
>
> +#include "nir.h"
> +#include "nir/nir_emulate.h"
> +
>  #include "pipe/p_context.h"
>  #include "pipe/p_defines.h"
>  #include "pipe/p_shader_tokens.h"
> @@ -70,8 +73,14 @@ delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
>     if (vpv->draw_shader)
>        draw_delete_vertex_shader( st->draw, vpv->draw_shader );
>
> -   if (vpv->tgsi.tokens)
> -      ureg_free_tokens(vpv->tgsi.tokens);
> +   if (vpv->tgsi.ir == PIPE_SHADER_IR_NIR) {
> +/* TODO until nir_clone():
> +      ralloc_free(vpv->tgsi.nir);
> + */
> +   } else {
> +      if (vpv->tgsi.tokens)
> +         ureg_free_tokens(vpv->tgsi.tokens);
> +   }
>
>     free( vpv );
>  }
> @@ -95,7 +104,12 @@ st_release_vp_variants( struct st_context *st,
>
>     stvp->variants = NULL;
>
> -   if (stvp->tgsi.tokens) {
> +   if (stvp->tgsi.ir == PIPE_SHADER_IR_NIR) {
> +/*
> +      ralloc_free(stvp->tgsi.nir);
> +      stvp->tgsi.nir = NULL;
> + */
> +   } else if (stvp->tgsi.tokens) {
>        tgsi_free_tokens(stvp->tgsi.tokens);
>        stvp->tgsi.tokens = NULL;
>     }
> @@ -134,7 +148,12 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
>
>     stfp->variants = NULL;
>
> -   if (stfp->tgsi.tokens) {
> +   if (stfp->tgsi.ir == PIPE_SHADER_IR_NIR) {
> +/*
> +      ralloc_free(stfp->tgsi.nir);
> +      stfp->tgsi.nir = NULL;
> + */
> +   } else if (stfp->tgsi.tokens) {
>        ureg_free_tokens(stfp->tgsi.tokens);
>        stfp->tgsi.tokens = NULL;
>     }
> @@ -251,6 +270,11 @@ st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep
>     }
>  }
>
> +// XXX
> +nir_shader *
> +st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
> +               struct gl_shader_program *shader_program,
> +               gl_shader_stage stage);
>
>  /**
>   * Translate a vertex program.
> @@ -388,9 +412,23 @@ st_translate_vertex_program(struct st_context *st,
>     output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
>     output_semantic_index[num_outputs] = 0;
>
> -   if (!stvp->glsl_to_tgsi)
> +   if (!stvp->glsl_to_tgsi && !stvp->shader_program)
>        _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
>
> +   if (stvp->shader_program) {
> +      nir_shader *nir = st_glsl_to_nir(st, &stvp->Base.Base,
> +                                       stvp->shader_program,
> +                                       MESA_SHADER_VERTEX);
> +
> +      stvp->tgsi.ir = PIPE_SHADER_IR_NIR;
> +      stvp->tgsi.nir = nir;
> +
> +      st_translate_stream_output_info2(&stvp->shader_program->LinkedTransformFeedback,
> +                                       stvp->result_to_output,
> +                                       &stvp->tgsi.stream_output);
> +      return true;
> +   }
> +
>     ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen);
>     if (ureg == NULL)
>        return false;
> @@ -461,6 +499,15 @@ st_translate_vertex_program(struct st_context *st,
>     return stvp->tgsi.tokens != NULL;
>  }
>
> +static unsigned
> +st_vp_variant_flags(const struct st_vp_variant_key *key)
> +{
> +   unsigned flags =
> +      (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
> +      (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
> +   return flags;
> +}
> +
>  static struct st_vp_variant *
>  st_create_vp_variant(struct st_context *st,
>                       struct st_vertex_program *stvp,
> @@ -468,18 +515,28 @@ st_create_vp_variant(struct st_context *st,
>  {
>     struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
>     struct pipe_context *pipe = st->pipe;
> +   unsigned flags = st_vp_variant_flags(key);
>
>     vpv->key = *key;
> -   vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
>     vpv->tgsi.stream_output = stvp->tgsi.stream_output;
>     vpv->num_inputs = stvp->num_inputs;
>
> +   if (stvp->tgsi.ir == PIPE_SHADER_IR_NIR) {
> +      vpv->tgsi.ir = PIPE_SHADER_IR_NIR;
> +      vpv->tgsi.nir = nir_shader_ref(stvp->tgsi.nir);
> +      if (flags) {
> +         vpv->tgsi.nir = nir_shader_mutable(vpv->tgsi.nir);
> +         nir_emulate(vpv->tgsi.nir, flags);
> +      }
> +      vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
> +      return vpv;
> +   }
> +
> +   vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
> +
>     /* Emulate features. */
> -   if (key->clamp_color || key->passthrough_edgeflags) {
> +   if (flags) {
>        const struct tgsi_token *tokens;
> -      unsigned flags =
> -         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
> -         (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
>
>        tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
>
> @@ -583,7 +640,7 @@ st_translate_fragment_program(struct st_context *st,
>
>     memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
>
> -   if (!stfp->glsl_to_tgsi)
> +   if (!stfp->glsl_to_tgsi && !stfp->shader_program)
>        _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
>
>     /*
> @@ -787,6 +844,17 @@ st_translate_fragment_program(struct st_context *st,
>        }
>     }
>
> +   if (stfp->shader_program) {
> +      nir_shader *nir = st_glsl_to_nir(st, &stfp->Base.Base,
> +                                       stfp->shader_program,
> +                                       MESA_SHADER_FRAGMENT);
> +
> +      stfp->tgsi.ir = PIPE_SHADER_IR_NIR;
> +      stfp->tgsi.nir = nir;
> +
> +      return true;
> +   }
> +
>     ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen);
>     if (ureg == NULL)
>        return false;
> @@ -867,6 +935,15 @@ st_translate_fragment_program(struct st_context *st,
>     return stfp->tgsi.tokens != NULL;
>  }
>
> +static unsigned
> +st_fp_variant_flags(const struct st_fp_variant_key *key)
> +{
> +   unsigned flags =
> +      (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
> +      (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
> +   return flags;
> +}
> +
>  static struct st_fp_variant *
>  st_create_fp_variant(struct st_context *st,
>                       struct st_fragment_program *stfp,
> @@ -875,20 +952,31 @@ st_create_fp_variant(struct st_context *st,
>     struct pipe_context *pipe = st->pipe;
>     struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
>     struct pipe_shader_state tgsi = {0};
> +   unsigned flags = st_fp_variant_flags(key);
>
>     if (!variant)
>        return NULL;
>
> +   if (stfp->tgsi.ir == PIPE_SHADER_IR_NIR) {
> +      tgsi.ir = PIPE_SHADER_IR_NIR;
> +      tgsi.nir = nir_shader_ref(stfp->tgsi.nir);
> +      if (flags) {
> +         tgsi.nir = nir_shader_mutable(tgsi.nir);
> +         nir_emulate(tgsi.nir, flags);
> +      }
> +      variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
> +      variant->key = *key;
> +
> +      return variant;
> +   }
> +
>     tgsi.tokens = stfp->tgsi.tokens;
>
>     assert(!(key->bitmap && key->drawpixels));
>
>     /* Emulate features. */
> -   if (key->clamp_color || key->persample_shading) {
> +   if (flags) {
>        const struct tgsi_token *tokens;
> -      unsigned flags =
> -         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
> -         (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
>
>        tokens = tgsi_emulate(tgsi.tokens, flags);
>
> diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
> index d9b53ac..9d82b39 100644
> --- a/src/mesa/state_tracker/st_program.h
> +++ b/src/mesa/state_tracker/st_program.h
> @@ -101,6 +101,9 @@ struct st_fragment_program
>     struct pipe_shader_state tgsi;
>     struct glsl_to_tgsi_visitor* glsl_to_tgsi;
>
> +   /* used when bypassing glsl_to_tgsi: */
> +   struct gl_shader_program *shader_program;
> +
>     struct st_fp_variant *variants;
>  };
>
> @@ -157,6 +160,9 @@ struct st_vertex_program
>     struct pipe_shader_state tgsi;
>     struct glsl_to_tgsi_visitor* glsl_to_tgsi;
>
> +   /* used when bypassing glsl_to_tgsi: */
> +   struct gl_shader_program *shader_program;
> +
>     /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
>     /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
>     GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
> --
> 2.5.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev