[Mesa-dev] [RFCv2 06/13] mesa/st: add support for NIR as possible driver IR

Tue Nov 10 08:39:58 PST 2015

On Tue, Nov 10, 2015 at 10:59 AM, Connor Abbott <cwabbott0 at gmail.com> wrote:
> On Sun, Nov 8, 2015 at 3:12 PM, Rob Clark <robdclark at gmail.com> wrote:
>> ---
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 410 ++++++++++++++++++++++++++++-
>>  src/mesa/state_tracker/st_glsl_to_tgsi.h   |   5 +
>>  src/mesa/state_tracker/st_program.c        | 118 +++++++--
>>  src/mesa/state_tracker/st_program.h        |   6 +
>>  4 files changed, 520 insertions(+), 19 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index f481e89..fbc598e 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -35,6 +35,9 @@
>>  #include "glsl_parser_extras.h"
>>  #include "ir_optimization.h"
>>
>> +#include "nir.h"
>> +#include "glsl_to_nir.h"
>> +
>>  #include "main/errors.h"
>>  #include "main/shaderobj.h"
>>  #include "main/uniforms.h"
>> @@ -5486,9 +5489,9 @@ out:
>>   * generating Mesa IR.
>>   */
>>  static struct gl_program *
>> -get_mesa_program(struct gl_context *ctx,
>> -                 struct gl_shader_program *shader_program,
>> -                 struct gl_shader *shader)
>> +get_mesa_program_tgsi(struct gl_context *ctx,
>> +                      struct gl_shader_program *shader_program,
>> +                      struct gl_shader *shader)
>>  {
>>     glsl_to_tgsi_visitor* v;
>>     struct gl_program *prog;
>> @@ -5680,6 +5683,396 @@ get_mesa_program(struct gl_context *ctx,
>>     return prog;
>>  }
>>
>> +/* TODO dup'd from brw_vec4_vistor.cpp..  what should we do? */
>> +static int
>> +type_size_vec4(const struct glsl_type *type)
>> +{
>> +   unsigned int i;
>> +   int size;
>> +
>> +   switch (type->base_type) {
>> +   case GLSL_TYPE_UINT:
>> +   case GLSL_TYPE_INT:
>> +   case GLSL_TYPE_FLOAT:
>> +   case GLSL_TYPE_BOOL:
>> +      if (type->is_matrix()) {
>> +        return type->matrix_columns;
>> +      } else {
>> +        /* Regardless of size of vector, it gets a vec4. This is bad
>> +         * packing for things like floats, but otherwise arrays become a
>> +         * mess.  Hopefully a later pass over the code can pack scalars
>> +         * down if appropriate.
>> +         */
>> +        return 1;
>> +      }
>> +   case GLSL_TYPE_ARRAY:
>> +      assert(type->length > 0);
>> +      return type_size_vec4(type->fields.array) * type->length;
>> +   case GLSL_TYPE_STRUCT:
>> +      size = 0;
>> +      for (i = 0; i < type->length; i++) {
>> +        size += type_size_vec4(type->fields.structure[i].type);
>> +      }
>> +      return size;
>> +   case GLSL_TYPE_SUBROUTINE:
>> +      return 1;
>> +
>> +   case GLSL_TYPE_SAMPLER:
>> +      /* Samplers take up no register space, since they're baked in at
>> +       * link time.
>> +       */
>> +      return 0;
>> +   case GLSL_TYPE_ATOMIC_UINT:
>> +      return 0;
>> +   case GLSL_TYPE_IMAGE:
>> +//      return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
>> +   case GLSL_TYPE_VOID:
>> +   case GLSL_TYPE_DOUBLE:
>> +   case GLSL_TYPE_ERROR:
>> +   case GLSL_TYPE_INTERFACE:
>> +      unreachable("not reached");
>> +   }
>> +
>> +   return 0;
>> +}
>> +
>> +/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
>> + * may need to fix up varying slots so the glsl->nir path is aligned
>> + * with the anything->tgsi->nir path.
>> + */
>> +static void
>> +st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
>> +{
>> +   if (st->needs_texcoord_semantic)
>> +      return;
>> +
>> +   nir_foreach_variable(var, var_list) {
>> +      if (var->data.location >= VARYING_SLOT_VAR0) {
>> +         var->data.location += 9;
>> +      } else if ((var->data.location >= VARYING_SLOT_TEX0) &&
>> +               (var->data.location <= VARYING_SLOT_TEX7)) {
>> +         var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
>> +      }
>> +   }
>> +}
>> +
>> +/* input location assignment for VS inputs must be handled specially, so
>> + * that it is aligned w/ st's vbo state.
>> + * (This isn't the case with, for ex, FS inputs, which only need to agree
>> + * on varying-slot w/ the VS outputs)
>> + */
>> +static void
>> +st_nir_assign_vs_in_locations(struct gl_program *prog,
>> +                              struct exec_list *var_list, unsigned *size)
>> +{
>> +   unsigned attr, num_inputs = 0;
>> +   unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
>> +
>> +   /* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
>> +   for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
>> +      if ((prog->InputsRead & BITFIELD64_BIT(attr)) != 0) {
>> +         input_to_index[attr] = num_inputs;
>> +         num_inputs++;
>> +         if ((prog->DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
>> +            /* add placeholder for second part of a double attribute */
>> +            num_inputs++;
>> +         }
>> +      }
>> +   }
>> +
>> +   *size = 0;
>> +   nir_foreach_variable(var, var_list) {
>> +      attr = var->data.location;
>> +      assert(attr < ARRAY_SIZE(input_to_index));
>> +      var->data.driver_location = input_to_index[attr];
>> +      (*size)++;
>> +   }
>> +}
>> +
>> +static void
>> +st_nir_assign_uniform_locations(struct gl_program *prog,
>> +                                struct exec_list *uniform_list, unsigned *size)
>> +{
>> +   int max = 0;
>> +   int shaderidx = 0;
>> +
>> +   nir_foreach_variable(uniform, uniform_list) {
>> +      int loc;
>> +
>> +      if (uniform->type->is_sampler()) {
>> +         loc = shaderidx++;
>> +         uniform->data.location = loc; /* this should match resulting sampler idx */
>> +      } else if (strncmp(uniform->name, "gl_", 3) == 0) {
>> +         const gl_state_index *const stateTokens = (gl_state_index *)uniform->state_slots[0].tokens;
>> +         /* This state reference has already been setup by ir_to_mesa, but we'll
>> +          * get the same index back here.
>> +          */
>> +         loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
>> +      } else {
>> +         loc = _mesa_lookup_parameter_index(prog->Parameters, -1, uniform->name);
>> +      }
>> +
>> +      /* is there a better way to do this?  If we have something like:
>> +       *
>> +       *    struct S {
>> +       *           float f;
>> +       *           vec4 v;
>> +       *    };
>> +       *    uniform S color;
>> +       *
>> +       * Then what we get in prog->Parameters looks like:
>> +       *
>> +       *    0: Name=color.f, Type=6, DataType=1406, Size=1
>> +       *    1: Name=color.v, Type=6, DataType=8b52, Size=4
>> +       *
>> +       * So the name doesn't match up and _mesa_lookup_parameter_index()
>> +       * fails.  In this case just find the first matching "color.*"..
>> +       */
>> +      if (loc < 0) {
>> +         int namelen = strlen(uniform->name);
>> +         for (unsigned i = 0; i < prog->Parameters->NumParameters; i++) {
>> +            struct gl_program_parameter *p = &prog->Parameters->Parameters[i];
>> +            if ((strncmp(p->Name, uniform->name, namelen) == 0) &&
>> +                (p->Name[namelen] == '.')) {
>> +               loc = i;
>> +               break;
>> +            }
>> +         }
>> +      }
>> +
>> +      uniform->data.driver_location = loc;
>> +      if (!uniform->type->is_sampler())
>> +         max = MAX2(max, loc + type_size_vec4(uniform->type));
>> +   }
>> +   *size = max;
>> +}
>
>
> Do you really need to do all this here? It seems to me that it would
> be much less painless if both tgsi_to_nir and the st just used
> variables and the location field for IO, and then left calling
> nir_lower_io() to the driver. This is the intention of the whole
> driver_location thing -- it's supposed to be decided by the driver
> (hence the name ;) ) and map 1:1, or at least relatively closely, to
> the indices the driver actually wants. We recently refactored i965 to
> basically do that, and it simplified things a lot, so it's been proven
> that there's at least some utility to the idea. It would also
> alleviate some of the problems you've been having with allocating
> extra variables in NIR lowering passes inside your driver, and make
> those lowering passes useful for i965 as well.

Well, we need to, for example, agree about the layout of uniform
buffer and vbo's/etc w/ mesa/st, so I think essentially for gallium
drivers, assigning the driver_location should be in control of
mesa/st..

Maybe we can re-write tgsi_to_nir to use variables everywhere
(although that seems somewhat artificial/invented since we don't know
what variables are really by the time we have tgsi).  I don't see that
bringing any benefit.

BR,
-R

>> +
>> +static struct gl_program *
>> +get_mesa_program_nir(struct gl_context *ctx,
>> +                     struct gl_shader_program *shader_program,
>> +                     struct gl_shader *shader)
>> +{
>> +   struct gl_program *prog;
>> +   GLenum target = _mesa_shader_stage_to_program(shader->Stage);
>> +
>> +   validate_ir_tree(shader->ir);
>> +
>> +   prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
>> +   if (!prog)
>> +      return NULL;
>> +
>> +   prog->Parameters = _mesa_new_parameter_list();
>> +
>> +   _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
>> +   _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
>> +                                               prog->Parameters);
>> +
>> +//   /* Remove reads from output registers. */
>> +//   lower_output_reads(shader->Stage, shader->ir);
>> +
>> +   /* Make a pass over the IR to add state references for any built-in
>> +    * uniforms that are used.  This has to be done now (during linking).
>> +    * Code generation doesn't happen until the first time this shader is
>> +    * used for rendering.  Waiting until then to generate the parameters is
>> +    * too late.  At that point, the values for the built-in uniforms won't
>> +    * get sent to the shader.
>> +    */
>> +   foreach_in_list(ir_instruction, node, shader->ir) {
>> +      ir_variable *var = node->as_variable();
>> +
>> +      if ((var == NULL) || (var->data.mode != ir_var_uniform) ||
>> +          (strncmp(var->name, "gl_", 3) != 0))
>> +         continue;
>> +
>> +      const ir_state_slot *const slots = var->get_state_slots();
>> +      assert(slots != NULL);
>> +
>> +      for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
>> +         _mesa_add_state_reference(prog->Parameters,
>> +                                   (gl_state_index *) slots[i].tokens);
>> +      }
>> +   }
>> +
>> +   if (ctx->_Shader->Flags & GLSL_DUMP) {
>> +      _mesa_log("\n");
>> +      _mesa_log("GLSL IR for linked %s program %d:\n",
>> +             _mesa_shader_stage_to_string(shader->Stage),
>> +             shader_program->Name);
>> +      _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
>> +      _mesa_log("\n\n");
>> +   }
>> +
>> +   prog->Instructions = NULL;
>> +   prog->NumInstructions = 0;
>> +
>> +   do_set_program_inouts(shader->ir, prog, shader->Stage);
>> +
>> +   prog->SamplersUsed = shader->active_samplers;
>> +   prog->ShadowSamplers = shader->shadow_samplers;
>> +   _mesa_update_shader_textures_used(shader_program, prog);
>> +
>> +   _mesa_reference_program(ctx, &shader->Program, prog);
>> +
>> +
>> +   /* This has to be done last.  Any operation the can cause
>> +    * prog->ParameterValues to get reallocated (e.g., anything that adds a
>> +    * program constant) has to happen before creating this linkage.
>> +    */
>> +   _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
>> +
>> +   struct st_vertex_program *stvp;
>> +   struct st_fragment_program *stfp;
>> +
>> +   switch (shader->Type) {
>> +   case GL_VERTEX_SHADER:
>> +      stvp = (struct st_vertex_program *)prog;
>> +      stvp->shader_program = shader_program;
>> +      break;
>> +   case GL_FRAGMENT_SHADER:
>> +      stfp = (struct st_fragment_program *)prog;
>> +      stfp->shader_program = shader_program;
>> +      break;
>> +   default:
>> +      assert(!"should not be reached");
>> +      return NULL;
>> +   }
>> +
>> +   return prog;
>> +}
>> +
>> +
>> +/* TODO probably get this from pipe_screen?
>> + * NOTE we need this to not be on the stack, so it doesn't go away when
>> + * get_mesa_program_nir() returns.. or just allow pipe driver to supply..
>> + */
>> +static nir_shader_compiler_options nir_options;
>> +
>> +// XXX move this!
>> +extern "C" {
>> +nir_shader *
>> +st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
>> +               struct gl_shader_program *shader_program,
>> +               gl_shader_stage stage)
>> +{
>> +   nir_shader *nir;
>> +
>> +   if (prog->nir)
>> +      return prog->nir;
>> +
>> +   // XXX get from pipe_screen?  Or just let pipe driver provide?
>> +   nir_options.lower_fpow = true;
>> +   nir_options.lower_fsat = true;
>> +   nir_options.lower_scmp = true;
>> +   nir_options.lower_flrp = true;
>> +   nir_options.lower_ffract = true;
>> +   nir_options.native_integers = true;
>> +
>> +   nir = glsl_to_nir(shader_program, stage, &nir_options);
>> +   prog->nir = nir;
>> +
>> +   nir_validate_shader(nir);
>> +
>> +   nir_print_shader(nir, _mesa_get_log_file());
>> +
>> +   nir_lower_global_vars_to_local(nir);
>> +   nir_validate_shader(nir);
>> +
>> +   nir_split_var_copies(nir);
>> +   nir_lower_var_copies(nir);
>> +   nir_validate_shader(nir);
>> +
>> +   /* fragment shaders may need : */
>> +   if (stage == MESA_SHADER_FRAGMENT) {
>> +      static const gl_state_index wposTransformState[STATE_LENGTH] = {
>> +         STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
>> +      };
>> +      nir_lower_wpos_ytransform_options wpos_options = {0};
>> +      struct pipe_screen *pscreen = st->pipe->screen;
>> +
>> +      memcpy(wpos_options.state_tokens, wposTransformState,
>> +             sizeof(wpos_options.state_tokens));
>> +      wpos_options.fs_coord_origin_upper_left =
>> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
>> +      wpos_options.fs_coord_origin_lower_left =
>> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
>> +      wpos_options.fs_coord_pixel_center_integer =
>> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
>> +      wpos_options.fs_coord_pixel_center_half_integer =
>> +         pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
>> +
>> +      if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
>> +         _mesa_add_state_reference(prog->Parameters, wposTransformState);
>> +      }
>> +   }
>> +
>> +   if (stage == MESA_SHADER_VERTEX) {
>> +      /* Needs special handling so drvloc matches the vbo state: */
>> +      st_nir_assign_vs_in_locations(prog, &nir->inputs, &nir->num_inputs);
>> +      nir_assign_var_locations(&nir->outputs,
>> +                               &nir->num_outputs,
>> +                               type_size_vec4);
>> +      st_nir_fixup_varying_slots(st, &nir->outputs);
>> +   } else if (stage == MESA_SHADER_FRAGMENT) {
>> +      nir_assign_var_locations(&nir->inputs,
>> +                               &nir->num_inputs,
>> +                               type_size_vec4);
>> +      st_nir_fixup_varying_slots(st, &nir->inputs);
>> +      nir_assign_var_locations(&nir->outputs,
>> +                               &nir->num_outputs,
>> +                               type_size_vec4);
>> +   } else {
>> +      unreachable("invalid shader type for tgsi bypass\n");
>> +   }
>> +
>> +   st_nir_assign_uniform_locations(prog, &nir->uniforms, &nir->num_uniforms);
>> +
>> +   nir_lower_system_values(nir);
>> +   nir_lower_io(nir, nir_var_all, type_size_vec4);
>> +   nir_lower_samplers(nir, NULL);
>> +   nir_validate_shader(nir);
>> +
>> +   // XXX do we need anything else from brw_create_nir().. and what
>> +   // is best way to split up which things should be here vs driver?
>> +   // currently just trying to make the result here similar to what
>> +   // we get from tgsi_to_nir().. so lower_io, etc..
>> +
>> +   // XXX new flag, probably?
>> +   if (st->ctx->_Shader->Flags & GLSL_DUMP) {
>> +      _mesa_log("\n");
>> +      _mesa_log("NIR IR for linked %s program %d:\n",
>> +             _mesa_shader_stage_to_string(stage),
>> +             shader_program->Name);
>> +      nir_print_shader(nir, _mesa_get_log_file());
>> +      _mesa_log("\n\n");
>> +   }
>> +
>> +   return nir;
>> +}
>> +}
>> +
>> +static struct gl_program *
>> +get_mesa_program(struct gl_context *ctx,
>> +                 struct gl_shader_program *shader_program,
>> +                 struct gl_shader *shader)
>> +{
>> +   struct pipe_screen *pscreen = ctx->st->pipe->screen;
>> +   unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
>> +   enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
>> +      pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
>> +   if (preferred_ir == PIPE_SHADER_IR_NIR) {
>> +      /* TODO only for GLSL VS/FS for now: */
>> +      switch (shader->Type) {
>> +      case GL_VERTEX_SHADER:
>> +      case GL_FRAGMENT_SHADER:
>> +         return get_mesa_program_nir(ctx, shader_program, shader);
>> +      default:
>> +         break;
>> +      }
>> +   }
>> +   return get_mesa_program_tgsi(ctx, shader_program, shader);
>> +}
>> +
>> +
>>  extern "C" {
>>
>>  static void
>> @@ -5880,9 +6273,18 @@ st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
>>                                  const GLuint outputMapping[],
>>                                  struct pipe_stream_output_info *so)
>>  {
>> -   unsigned i;
>>     struct gl_transform_feedback_info *info =
>>        &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
>> +   st_translate_stream_output_info2(info, outputMapping, so);
>> +}
>> +
>> +/* TODO better name, and split out into own patch.. */
>> +void
>> +st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
>> +                                const GLuint outputMapping[],
>> +                                struct pipe_stream_output_info *so)
>> +{
>> +   unsigned i;
>>
>>     for (i = 0; i < info->NumOutputs; i++) {
>>        so->output[i].register_index =
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.h b/src/mesa/state_tracker/st_glsl_to_tgsi.h
>> index 729295b..1986025 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.h
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.h
>> @@ -63,6 +63,11 @@ st_translate_stream_output_info(struct glsl_to_tgsi_visitor *glsl_to_tgsi,
>>                                  const GLuint outputMapping[],
>>                                  struct pipe_stream_output_info *so);
>>
>> +void
>> +st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
>> +                                const GLuint outputMapping[],
>> +                                struct pipe_stream_output_info *so);
>> +
>>  extern const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX];
>>
>>  #ifdef __cplusplus
>> diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
>> index 75ccaf2..d2e4627 100644
>> --- a/src/mesa/state_tracker/st_program.c
>> +++ b/src/mesa/state_tracker/st_program.c
>> @@ -38,6 +38,9 @@
>>  #include "program/prog_print.h"
>>  #include "program/programopt.h"
>>
>> +#include "nir.h"
>> +#include "nir/nir_emulate.h"
>> +
>>  #include "pipe/p_context.h"
>>  #include "pipe/p_defines.h"
>>  #include "pipe/p_shader_tokens.h"
>> @@ -70,8 +73,14 @@ delete_vp_variant(struct st_context *st, struct st_vp_variant *vpv)
>>     if (vpv->draw_shader)
>>        draw_delete_vertex_shader( st->draw, vpv->draw_shader );
>>
>> -   if (vpv->tgsi.tokens)
>> -      ureg_free_tokens(vpv->tgsi.tokens);
>> +   if (vpv->tgsi.ir == PIPE_SHADER_IR_NIR) {
>> +/* TODO until nir_clone():
>> +      ralloc_free(vpv->tgsi.nir);
>> + */
>> +   } else {
>> +      if (vpv->tgsi.tokens)
>> +         ureg_free_tokens(vpv->tgsi.tokens);
>> +   }
>>
>>     free( vpv );
>>  }
>> @@ -95,7 +104,12 @@ st_release_vp_variants( struct st_context *st,
>>
>>     stvp->variants = NULL;
>>
>> -   if (stvp->tgsi.tokens) {
>> +   if (stvp->tgsi.ir == PIPE_SHADER_IR_NIR) {
>> +/*
>> +      ralloc_free(stvp->tgsi.nir);
>> +      stvp->tgsi.nir = NULL;
>> + */
>> +   } else if (stvp->tgsi.tokens) {
>>        tgsi_free_tokens(stvp->tgsi.tokens);
>>        stvp->tgsi.tokens = NULL;
>>     }
>> @@ -134,7 +148,12 @@ st_release_fp_variants(struct st_context *st, struct st_fragment_program *stfp)
>>
>>     stfp->variants = NULL;
>>
>> -   if (stfp->tgsi.tokens) {
>> +   if (stfp->tgsi.ir == PIPE_SHADER_IR_NIR) {
>> +/*
>> +      ralloc_free(stfp->tgsi.nir);
>> +      stfp->tgsi.nir = NULL;
>> + */
>> +   } else if (stfp->tgsi.tokens) {
>>        ureg_free_tokens(stfp->tgsi.tokens);
>>        stfp->tgsi.tokens = NULL;
>>     }
>> @@ -251,6 +270,11 @@ st_release_tep_variants(struct st_context *st, struct st_tesseval_program *sttep
>>     }
>>  }
>>
>> +// XXX
>> +nir_shader *
>> +st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
>> +               struct gl_shader_program *shader_program,
>> +               gl_shader_stage stage);
>>
>>  /**
>>   * Translate a vertex program.
>> @@ -388,9 +412,23 @@ st_translate_vertex_program(struct st_context *st,
>>     output_semantic_name[num_outputs] = TGSI_SEMANTIC_EDGEFLAG;
>>     output_semantic_index[num_outputs] = 0;
>>
>> -   if (!stvp->glsl_to_tgsi)
>> +   if (!stvp->glsl_to_tgsi && !stvp->shader_program)
>>        _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
>>
>> +   if (stvp->shader_program) {
>> +      nir_shader *nir = st_glsl_to_nir(st, &stvp->Base.Base,
>> +                                       stvp->shader_program,
>> +                                       MESA_SHADER_VERTEX);
>> +
>> +      stvp->tgsi.ir = PIPE_SHADER_IR_NIR;
>> +      stvp->tgsi.nir = nir;
>> +
>> +      st_translate_stream_output_info2(&stvp->shader_program->LinkedTransformFeedback,
>> +                                       stvp->result_to_output,
>> +                                       &stvp->tgsi.stream_output);
>> +      return true;
>> +   }
>> +
>>     ureg = ureg_create_with_screen(TGSI_PROCESSOR_VERTEX, st->pipe->screen);
>>     if (ureg == NULL)
>>        return false;
>> @@ -461,6 +499,15 @@ st_translate_vertex_program(struct st_context *st,
>>     return stvp->tgsi.tokens != NULL;
>>  }
>>
>> +static unsigned
>> +st_vp_variant_flags(const struct st_vp_variant_key *key)
>> +{
>> +   unsigned flags =
>> +      (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
>> +      (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
>> +   return flags;
>> +}
>> +
>>  static struct st_vp_variant *
>>  st_create_vp_variant(struct st_context *st,
>>                       struct st_vertex_program *stvp,
>> @@ -468,18 +515,28 @@ st_create_vp_variant(struct st_context *st,
>>  {
>>     struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
>>     struct pipe_context *pipe = st->pipe;
>> +   unsigned flags = st_vp_variant_flags(key);
>>
>>     vpv->key = *key;
>> -   vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
>>     vpv->tgsi.stream_output = stvp->tgsi.stream_output;
>>     vpv->num_inputs = stvp->num_inputs;
>>
>> +   if (stvp->tgsi.ir == PIPE_SHADER_IR_NIR) {
>> +      vpv->tgsi.ir = PIPE_SHADER_IR_NIR;
>> +      vpv->tgsi.nir = nir_shader_ref(stvp->tgsi.nir);
>> +      if (flags) {
>> +         vpv->tgsi.nir = nir_shader_mutable(vpv->tgsi.nir);
>> +         nir_emulate(vpv->tgsi.nir, flags);
>> +      }
>> +      vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
>> +      return vpv;
>> +   }
>> +
>> +   vpv->tgsi.tokens = tgsi_dup_tokens(stvp->tgsi.tokens);
>> +
>>     /* Emulate features. */
>> -   if (key->clamp_color || key->passthrough_edgeflags) {
>> +   if (flags) {
>>        const struct tgsi_token *tokens;
>> -      unsigned flags =
>> -         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
>> -         (key->passthrough_edgeflags ? TGSI_EMU_PASSTHROUGH_EDGEFLAG : 0);
>>
>>        tokens = tgsi_emulate(vpv->tgsi.tokens, flags);
>>
>> @@ -583,7 +640,7 @@ st_translate_fragment_program(struct st_context *st,
>>
>>     memset(inputSlotToAttr, ~0, sizeof(inputSlotToAttr));
>>
>> -   if (!stfp->glsl_to_tgsi)
>> +   if (!stfp->glsl_to_tgsi && !stfp->shader_program)
>>        _mesa_remove_output_reads(&stfp->Base.Base, PROGRAM_OUTPUT);
>>
>>     /*
>> @@ -787,6 +844,17 @@ st_translate_fragment_program(struct st_context *st,
>>        }
>>     }
>>
>> +   if (stfp->shader_program) {
>> +      nir_shader *nir = st_glsl_to_nir(st, &stfp->Base.Base,
>> +                                       stfp->shader_program,
>> +                                       MESA_SHADER_FRAGMENT);
>> +
>> +      stfp->tgsi.ir = PIPE_SHADER_IR_NIR;
>> +      stfp->tgsi.nir = nir;
>> +
>> +      return true;
>> +   }
>> +
>>     ureg = ureg_create_with_screen(TGSI_PROCESSOR_FRAGMENT, st->pipe->screen);
>>     if (ureg == NULL)
>>        return false;
>> @@ -867,6 +935,15 @@ st_translate_fragment_program(struct st_context *st,
>>     return stfp->tgsi.tokens != NULL;
>>  }
>>
>> +static unsigned
>> +st_fp_variant_flags(const struct st_fp_variant_key *key)
>> +{
>> +   unsigned flags =
>> +      (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
>> +      (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
>> +   return flags;
>> +}
>> +
>>  static struct st_fp_variant *
>>  st_create_fp_variant(struct st_context *st,
>>                       struct st_fragment_program *stfp,
>> @@ -875,20 +952,31 @@ st_create_fp_variant(struct st_context *st,
>>     struct pipe_context *pipe = st->pipe;
>>     struct st_fp_variant *variant = CALLOC_STRUCT(st_fp_variant);
>>     struct pipe_shader_state tgsi = {0};
>> +   unsigned flags = st_fp_variant_flags(key);
>>
>>     if (!variant)
>>        return NULL;
>>
>> +   if (stfp->tgsi.ir == PIPE_SHADER_IR_NIR) {
>> +      tgsi.ir = PIPE_SHADER_IR_NIR;
>> +      tgsi.nir = nir_shader_ref(stfp->tgsi.nir);
>> +      if (flags) {
>> +         tgsi.nir = nir_shader_mutable(tgsi.nir);
>> +         nir_emulate(tgsi.nir, flags);
>> +      }
>> +      variant->driver_shader = pipe->create_fs_state(pipe, &tgsi);
>> +      variant->key = *key;
>> +
>> +      return variant;
>> +   }
>> +
>>     tgsi.tokens = stfp->tgsi.tokens;
>>
>>     assert(!(key->bitmap && key->drawpixels));
>>
>>     /* Emulate features. */
>> -   if (key->clamp_color || key->persample_shading) {
>> +   if (flags) {
>>        const struct tgsi_token *tokens;
>> -      unsigned flags =
>> -         (key->clamp_color ? TGSI_EMU_CLAMP_COLOR_OUTPUTS : 0) |
>> -         (key->persample_shading ? TGSI_EMU_FORCE_PERSAMPLE_INTERP : 0);
>>
>>        tokens = tgsi_emulate(tgsi.tokens, flags);
>>
>> diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
>> index d9b53ac..9d82b39 100644
>> --- a/src/mesa/state_tracker/st_program.h
>> +++ b/src/mesa/state_tracker/st_program.h
>> @@ -101,6 +101,9 @@ struct st_fragment_program
>>     struct pipe_shader_state tgsi;
>>     struct glsl_to_tgsi_visitor* glsl_to_tgsi;
>>
>> +   /* used when bypassing glsl_to_tgsi: */
>> +   struct gl_shader_program *shader_program;
>> +
>>     struct st_fp_variant *variants;
>>  };
>>
>> @@ -157,6 +160,9 @@ struct st_vertex_program
>>     struct pipe_shader_state tgsi;
>>     struct glsl_to_tgsi_visitor* glsl_to_tgsi;
>>
>> +   /* used when bypassing glsl_to_tgsi: */
>> +   struct gl_shader_program *shader_program;
>> +
>>     /** maps a Mesa VERT_ATTRIB_x to a packed TGSI input index */
>>     /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */
>>     GLuint index_to_input[PIPE_MAX_SHADER_INPUTS];
>> --
>> 2.5.0
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev