[Mesa-dev] [PATCH 2/6] nir: Implement a Mesa IR -> NIR translator.

Tue Mar 24 15:14:15 PDT 2015

On Mon, Mar 23, 2015 at 8:37 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> Shamelessly ripped off from Eric Anholt's tgsi_to_nir pass.
>
> Not compiled on SCons, like the rest of NIR.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/Makefile.am                |    2 +
>  src/mesa/Makefile.sources           |    5 +
>  src/mesa/program/prog_instruction.h |    2 +
>  src/mesa/program/prog_to_nir.c      | 1189 +++++++++++++++++++++++++++++++++++
>  src/mesa/program/prog_to_nir.h      |   37 ++
>  5 files changed, 1235 insertions(+)
>  create mode 100644 src/mesa/program/prog_to_nir.c
>  create mode 100644 src/mesa/program/prog_to_nir.h
>
> diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am
> index 3dab8f0..60114e4 100644
> --- a/src/mesa/Makefile.am
> +++ b/src/mesa/Makefile.am
> @@ -174,6 +174,7 @@ endif
>  libmesa_la_SOURCES = \
>         $(MESA_FILES) \
>         $(PROGRAM_FILES) \
> +       $(PROGRAM_NIR_FILES) \
>         $(MESA_ASM_FILES_FOR_ARCH)
>
>  libmesa_la_LIBADD = \
> @@ -183,6 +184,7 @@ libmesa_la_LIBADD = \
>  libmesagallium_la_SOURCES = \
>         $(MESA_GALLIUM_FILES) \
>         $(PROGRAM_FILES) \
> +       $(PROGRAM_NIR_FILES) \
>         $(MESA_ASM_FILES_FOR_ARCH)
>
>  libmesagallium_la_LIBADD = \
> diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
> index 217be9a..cc166ce 100644
> --- a/src/mesa/Makefile.sources
> +++ b/src/mesa/Makefile.sources
> @@ -520,6 +520,10 @@ PROGRAM_FILES = \
>         program/symbol_table.c \
>         program/symbol_table.h
>
> +PROGRAM_NIR_FILES = \
> +       program/prog_to_nir.c \
> +       program/prog_to_nir.h
> +
>  ASM_C_FILES =  \
>         x86/common_x86.c \
>         x86/x86_xform.c \
> @@ -608,6 +612,7 @@ INCLUDE_DIRS = \
>         -I$(top_srcdir)/src \
>         -I$(top_srcdir)/src/glsl \
>         -I$(top_builddir)/src/glsl \
> +       -I$(top_builddir)/src/glsl/nir \
>         -I$(top_srcdir)/src/glsl/glcpp \
>         -I$(top_srcdir)/src/mesa \
>         -I$(top_builddir)/src/mesa \
> diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h
> index ab3acbc..96da198 100644
> --- a/src/mesa/program/prog_instruction.h
> +++ b/src/mesa/program/prog_instruction.h
> @@ -59,6 +59,8 @@
>  #define SWIZZLE_NOOP           MAKE_SWIZZLE4(0,1,2,3)
>  #define GET_SWZ(swz, idx)      (((swz) >> ((idx)*3)) & 0x7)
>  #define GET_BIT(msk, idx)      (((msk) >> (idx)) & 0x1)
> +/** Determine if swz contains SWIZZLE_ZERO/ONE/NIL for any components. */
> +#define HAS_EXTENDED_SWIZZLE(swz) (swz & 0x924)
>
>  #define SWIZZLE_XYZW MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W)
>  #define SWIZZLE_XXXX MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)
> diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c
> new file mode 100644
> index 0000000..7093346
> --- /dev/null
> +++ b/src/mesa/program/prog_to_nir.c
> @@ -0,0 +1,1189 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + * Copyright © 2014-2015 Broadcom
> + * Copyright (C) 2014 Rob Clark <robclark at freedesktop.org>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include "nir/nir.h"
> +#include "nir/nir_builder.h"
> +#include "glsl/list.h"
> +#include "main/imports.h"
> +#include "util/ralloc.h"
> +
> +#include "prog_to_nir.h"
> +#include "prog_instruction.h"
> +#include "prog_parameter.h"
> +#include "prog_print.h"
> +
> +#define SWIZ(X, Y, Z, W) (unsigned[4]){ \
> +      SWIZZLE_##X,                      \
> +      SWIZZLE_##Y,                      \
> +      SWIZZLE_##Z,                      \
> +      SWIZZLE_##W,                      \
> +   }
> +
> +struct ptn_compile {
> +   struct gl_program *prog;
> +   struct nir_builder build;
> +   struct nir_shader *s;
> +   nir_function_impl *impl;
> +   struct exec_list *cf_node_list;
> +   bool error;
> +
> +   nir_variable *input_vars[VARYING_SLOT_MAX];
> +   nir_variable *output_vars[VARYING_SLOT_MAX];
> +   nir_register **output_regs;
> +   nir_register **temp_regs;
> +
> +   nir_register *addr_reg;
> +};
> +
> +static nir_ssa_def *
> +ptn_imm_vec4(struct ptn_compile *c, float x, float y, float z, float w)
> +{
> +   nir_load_const_instr *load_const;
> +
> +   load_const = nir_load_const_instr_create(c->s, 4);
> +   if (!load_const) {
> +      c->error = true;
> +      return NULL;
> +   }
> +
> +   load_const->value.f[0] = x;
> +   load_const->value.f[1] = y;
> +   load_const->value.f[2] = z;
> +   load_const->value.f[3] = w;
> +
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &load_const->instr);
> +
> +   return &load_const->def;
> +}
> +
> +static nir_ssa_def *
> +ptn_imm_f(struct ptn_compile *c, float f)
> +{
> +   return ptn_imm_vec4(c, f, f, f, f);
> +}
> +
> +static nir_ssa_def *
> +ptn_imm_d(struct ptn_compile *c, int x)
> +{
> +   nir_load_const_instr *load_const;
> +
> +   load_const = nir_load_const_instr_create(c->s, 1);
> +   if (!load_const) {
> +      c->error = true;
> +      return NULL;
> +   }
> +
> +   load_const->value.i[0] = x;
> +
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &load_const->instr);
> +
> +   return &load_const->def;
> +}

Could we move this stuff to nir_builder.h? Presumably we'd also need
to clean up the naming a little bit too to be less Mesa IR specific
and to make it easier to extend consistently later -- for example,
ptn_imm_d only creates a single component but ptn_imm_f splats a
component into all 4 channels. How about something like nir_imm_vec4,
nir_imm_vec4_splat, and nir_imm_int? If needed we can add things like
nir_imm_bool, nir_imm_vec3_splat, etc. later and it should be
consistent.

> +
> +static nir_ssa_def *
> +ptn_fmov_alu(struct ptn_compile *c, nir_alu_src src)
> +{
> +   nir_alu_instr *mov =
> +      nir_alu_instr_create(c->s, nir_op_fmov);
> +   nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 4, NULL);
> +   mov->dest.write_mask = 0xf;
> +   mov->src[0] = src;
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &mov->instr);
> +
> +   return &mov->dest.dest.ssa;
> +}
> +
> +static nir_ssa_def *
> +ptn_swizzle_full(struct ptn_compile *c, nir_ssa_def *src, unsigned swiz[4])
> +{
> +   nir_alu_src alu_src;
> +   memset(&alu_src, 0, sizeof(alu_src));
> +   alu_src.src = nir_src_for_ssa(src);
> +   for (int i = 0; i < 4; i++)
> +      alu_src.swizzle[i] = swiz[i];
> +
> +   return ptn_fmov_alu(c, alu_src);
> +}
> +
> +#define ptn_swizzle(c, src, x, y, z, w) \
> +   ptn_swizzle_full(c, src, SWIZ(x, y, z, w))
> +#define ptn_channel(c, src, swiz) \
> +   ptn_swizzle_full(c, src, SWIZ(swiz, swiz, swiz, swiz))

Can we also move this swizzle stuff to nir_builder.h? TGSI -> NIR
already has similar code.

> +
> +static nir_ssa_def *
> +ptn_src_for_dest(struct ptn_compile *c, nir_alu_dest *dest)
> +{
> +   nir_alu_src src;
> +   memset(&src, 0, sizeof(src));
> +
> +   if (dest->dest.is_ssa)
> +      src.src = nir_src_for_ssa(&dest->dest.ssa);
> +   else {
> +      assert(!dest->dest.reg.indirect);
> +      src.src = nir_src_for_reg(dest->dest.reg.reg);
> +      src.src.reg.base_offset = dest->dest.reg.base_offset;
> +   }
> +
> +   for (int i = 0; i < 4; i++)
> +      src.swizzle[i] = i;
> +
> +   return ptn_fmov_alu(c, src);
> +}
> +
> +static nir_alu_dest
> +ptn_get_dest(struct ptn_compile *c, const struct prog_dst_register *prog_dst)
> +{
> +   nir_alu_dest dest;
> +
> +   memset(&dest, 0, sizeof(dest));
> +
> +   switch (prog_dst->File) {
> +   case PROGRAM_TEMPORARY:
> +      dest.dest.reg.reg = c->temp_regs[prog_dst->Index];
> +      break;
> +   case PROGRAM_OUTPUT:
> +      dest.dest.reg.reg = c->output_regs[prog_dst->Index];
> +      break;
> +   case PROGRAM_ADDRESS:
> +      assert(prog_dst->Index == 0);
> +      dest.dest.reg.reg = c->addr_reg;
> +      break;
> +   case PROGRAM_UNDEFINED:
> +      break;
> +   }
> +
> +   dest.write_mask = prog_dst->WriteMask;
> +   dest.saturate = false;
> +
> +   assert(!prog_dst->RelAddr);
> +
> +   return dest;
> +}
> +
> +/**
> + * Multiply the contents of the ADDR register by 4 to convert from the number
> + * of vec4s to the number of floating point components.
> + */
> +static nir_ssa_def *
> +ptn_addr_reg_value(struct ptn_compile *c)
> +{
> +   struct nir_builder *b = &c->build;
> +   nir_alu_src src;
> +   memset(&src, 0, sizeof(src));
> +   src.src = nir_src_for_reg(c->addr_reg);
> +
> +   return nir_imul(b, ptn_fmov_alu(c, src), ptn_imm_d(c, 4));
> +}
> +
> +static nir_ssa_def *
> +ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
> +{
> +   struct nir_builder *b = &c->build;
> +   nir_alu_src src;
> +
> +   memset(&src, 0, sizeof(src));
> +
> +   switch (prog_src->File) {
> +   case PROGRAM_UNDEFINED:
> +      return ptn_imm_f(c, 0.0);
> +   case PROGRAM_TEMPORARY:
> +      assert(!prog_src->RelAddr && prog_src->Index >= 0);
> +      src.src.reg.reg = c->temp_regs[prog_src->Index];
> +      break;
> +   case PROGRAM_INPUT: {
> +      /* ARB_vertex_program doesn't allow relative addressing on vertex
> +       * attributes; ARB_fragment_program has no relative addressing at all.
> +       */
> +      assert(!prog_src->RelAddr);
> +
> +      assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
> +
> +      nir_intrinsic_instr *load =
> +         nir_intrinsic_instr_create(c->s, nir_intrinsic_load_var);
> +      load->num_components = 4;
> +      load->variables[0] =
> +         nir_deref_var_create(c->s, c->input_vars[prog_src->Index]);
> +
> +      nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
> +      nir_instr_insert_after_cf_list(c->cf_node_list, &load->instr);
> +
> +      src.src = nir_src_for_ssa(&load->dest.ssa);
> +      break;
> +   }
> +   case PROGRAM_STATE_VAR:
> +   case PROGRAM_CONSTANT: {
> +      /* We actually want to look at the type in the Parameters list for this,
> +       * because it lets us upload constant builtin uniforms as actual
> +       * constants.
> +       */
> +      struct gl_program_parameter_list *plist = c->prog->Parameters;
> +      gl_register_file file = prog_src->RelAddr ? prog_src->File :
> +         plist->Parameters[prog_src->Index].Type;
> +
> +      switch (file) {
> +      case PROGRAM_CONSTANT:
> +         if ((c->prog->IndirectRegisterFiles & (1 << PROGRAM_CONSTANT)) == 0) {
> +            float *v = (float *) plist->ParameterValues[prog_src->Index];
> +            src.src = nir_src_for_ssa(ptn_imm_vec4(c, v[0], v[1], v[2], v[3]));
> +            break;
> +         }
> +         /* FALLTHROUGH */
> +      case PROGRAM_STATE_VAR: {
> +         nir_intrinsic_op load_op =
> +            prog_src->RelAddr ? nir_intrinsic_load_uniform_indirect :
> +                                nir_intrinsic_load_uniform;
> +         nir_intrinsic_instr *load = nir_intrinsic_instr_create(c->s, load_op);
> +         nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
> +         load->num_components = 4;
> +
> +         /* Multiply src->Index by 4 to scale from # of vec4s to components. */
> +         load->const_index[0] = 4 * prog_src->Index;
> +         load->const_index[1] = 1;
> +
> +         if (prog_src->RelAddr) {
> +            nir_ssa_def *reladdr = ptn_addr_reg_value(c);
> +            if (prog_src->Index < 0) {
> +               /* This is a negative offset which should be added to the address
> +                * register's value.
> +                */
> +               reladdr = nir_iadd(b, reladdr, ptn_imm_d(c, load->const_index[0]));
> +               load->const_index[0] = 0;
> +            }
> +            load->src[0] = nir_src_for_ssa(reladdr);
> +         }
> +
> +         nir_instr_insert_after_cf_list(c->cf_node_list, &load->instr);
> +
> +         src.src = nir_src_for_ssa(&load->dest.ssa);
> +         break;
> +      }
> +      default:
> +         fprintf(stderr, "bad uniform src register file: %s (%d)\n",
> +                 _mesa_register_file_name(file), file);
> +         abort();
> +      }
> +      break;
> +   }
> +   default:
> +      fprintf(stderr, "unknown src register file: %s (%d)\n",
> +              _mesa_register_file_name(prog_src->File), prog_src->File);
> +      abort();
> +   }
> +
> +   nir_ssa_def *def;
> +   if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle)) {
> +      for (int i = 0; i < 4; i++)
> +         src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
> +
> +      def = ptn_fmov_alu(c, src);
> +   } else {
> +      nir_ssa_def *chans[4];
> +      for (int i = 0; i < 4; i++) {
> +         int swizzle = GET_SWZ(prog_src->Swizzle, i);
> +         if (swizzle == SWIZZLE_ZERO) {
> +            chans[i] = ptn_imm_f(c, 0.0);
> +         } else if (swizzle == SWIZZLE_ONE) {
> +            chans[i] = ptn_imm_f(c, 1.0);
> +         } else {
> +            assert(swizzle != SWIZZLE_NIL);
> +            nir_alu_instr *mov = nir_alu_instr_create(c->s, nir_op_fmov);
> +            nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
> +            mov->dest.write_mask = 0x1;
> +            mov->src[0] = src;
> +            mov->src[0].swizzle[0] = swizzle;
> +            nir_instr_insert_after_cf_list(c->cf_node_list, &mov->instr);
> +
> +            chans[i] = &mov->dest.dest.ssa;
> +         }
> +      }
> +      def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
> +   }
> +
> +   if (prog_src->Abs)
> +      def = nir_fabs(b, def);
> +
> +   if (prog_src->Negate)
> +      def = nir_fneg(b, def);
> +
> +   return def;
> +}
> +
> +static void
> +ptn_alu(struct ptn_compile *c, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   unsigned num_srcs = nir_op_infos[op].num_inputs;
> +   nir_alu_instr *instr = nir_alu_instr_create(c->s, op);
> +   unsigned i;
> +
> +   for (i = 0; i < num_srcs; i++)
> +      instr->src[i].src = nir_src_for_ssa(src[i]);
> +
> +   instr->dest = dest;
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &instr->instr);
> +}
> +
> +static void
> +ptn_move_dest_masked(struct ptn_compile *c, nir_alu_dest dest,
> +                     nir_ssa_def *def, unsigned write_mask)
> +{
> +   if (!(dest.write_mask & write_mask))
> +      return;
> +
> +   nir_alu_instr *mov = nir_alu_instr_create(c->s, nir_op_fmov);
> +   if (!mov) {
> +      c->error = true;
> +      return;
> +   }
> +   mov->dest = dest;
> +   mov->dest.write_mask &= write_mask;
> +   mov->src[0].src = nir_src_for_ssa(def);
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &mov->instr);
> +}
> +
> +static void
> +ptn_move_dest(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def *def)
> +{
> +   ptn_move_dest_masked(c, dest, def, WRITEMASK_XYZW);
> +}
> +
> +static void
> +ptn_arl(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   ptn_move_dest(c, dest, nir_f2i(b, nir_ffloor(b, src[0])));
> +}
> +
> +/* EXP - Approximate Exponential Base 2
> + *  dst.x = 2^{\lfloor src.x\rfloor}
> + *  dst.y = src.x - \lfloor src.x\rfloor
> + *  dst.z = 2^{src.x}
> + *  dst.w = 1.0
> + */
> +static void
> +ptn_exp(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   nir_ssa_def *srcx = ptn_channel(c, src[0], X);
> +
> +   ptn_move_dest_masked(c, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
> +                        WRITEMASK_X);
> +   ptn_move_dest_masked(c, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
> +                        WRITEMASK_Y);
> +   ptn_move_dest_masked(c, dest, nir_fexp2(b, srcx), WRITEMASK_Z);
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 1.0), WRITEMASK_W);
> +}
> +
> +/* LOG - Approximate Logarithm Base 2
> + *  dst.x = \lfloor\log_2{|src.x|}\rfloor
> + *  dst.y = |src.x| * 2^{-\lfloor\log_2{|src.x|}\rfloor}}
> + *  dst.z = \log_2{|src.x|}
> + *  dst.w = 1.0
> + */
> +static void
> +ptn_log(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   nir_ssa_def *abs_srcx = nir_fabs(b, ptn_channel(c, src[0], X));
> +   nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
> +   nir_ssa_def *floor_log2 = nir_ffloor(b, log2);
> +
> +   ptn_move_dest_masked(c, dest, floor_log2, WRITEMASK_X);
> +   ptn_move_dest_masked(c, dest,
> +                        nir_fmul(b, abs_srcx,
> +                                 nir_fexp2(b, nir_fneg(b, floor_log2))),
> +                        WRITEMASK_Y);
> +   ptn_move_dest_masked(c, dest, log2, WRITEMASK_Z);
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 1.0), WRITEMASK_W);
> +}
> +
> +/* DST - Distance Vector
> + *   dst.x = 1.0
> + *   dst.y = src0.y \times src1.y
> + *   dst.z = src0.z
> + *   dst.w = src1.w
> + */
> +static void
> +ptn_dst(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 1.0), WRITEMASK_X);
> +   ptn_move_dest_masked(c, dest, nir_fmul(b, src[0], src[1]), WRITEMASK_Y);
> +   ptn_move_dest_masked(c, dest, nir_fmov(b, src[0]), WRITEMASK_Z);
> +   ptn_move_dest_masked(c, dest, nir_fmov(b, src[1]), WRITEMASK_W);
> +}
> +
> +/* LIT - Light Coefficients
> + *  dst.x = 1.0
> + *  dst.y = max(src.x, 0.0)
> + *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
> + *  dst.w = 1.0
> + */
> +static void
> +ptn_lit(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 1.0), WRITEMASK_XW);
> +
> +   ptn_move_dest_masked(c, dest, nir_fmax(b, ptn_channel(c, src[0], X),
> +                                          ptn_imm_f(c, 0.0)), WRITEMASK_Y);
> +
> +   if (dest.write_mask & WRITEMASK_Z) {
> +      nir_ssa_def *src0_y = ptn_channel(c, src[0], Y);
> +      nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(c, src[0], W),
> +                                                 ptn_imm_f(c, 128.0)),
> +                                     ptn_imm_f(c, -128.0));
> +      nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, ptn_imm_f(c, 0.0)),
> +                                  wclamp);
> +
> +      nir_ssa_def *z;
> +      if (c->s->options->native_integers) {
> +         z = nir_bcsel(b,
> +                       nir_fge(b, ptn_imm_f(c, 0.0), ptn_channel(c, src[0], X)),
> +                       ptn_imm_f(c, 0.0),
> +                       pow);
> +      } else {
> +         z = nir_fcsel(b,
> +                       nir_sge(b, ptn_imm_f(c, 0.0), ptn_channel(c, src[0], X)),
> +                       ptn_imm_f(c, 0.0),
> +                       pow);
> +      }
> +
> +      ptn_move_dest_masked(c, dest, z, WRITEMASK_Z);
> +   }
> +}
> +
> +/* SCS - Sine Cosine
> + *   dst.x = \cos{src.x}
> + *   dst.y = \sin{src.x}
> + *   dst.z = 0.0
> + *   dst.w = 1.0
> + */
> +static void
> +ptn_scs(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +
> +   ptn_move_dest_masked(c, dest, nir_fcos(b, ptn_channel(c, src[0], X)),
> +                        WRITEMASK_X);
> +   ptn_move_dest_masked(c, dest, nir_fsin(b, ptn_channel(c, src[0], X)),
> +                        WRITEMASK_Y);
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 0.0), WRITEMASK_Z);
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 1.0), WRITEMASK_W);
> +}
> +
> +/**
> + * Emit SLT.  For platforms with integers, prefer b2f(flt(...)).
> + */
> +static void
> +ptn_slt(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   if (c->s->options->native_integers) {
> +      ptn_move_dest(c, dest, nir_b2f(b, nir_flt(b, src[0], src[1])));
> +   } else {
> +      ptn_move_dest(c, dest, nir_slt(b, src[0], src[1]));
> +   }
> +}
> +
> +/**
> + * Emit SGE.  For platforms with integers, prefer b2f(fge(...)).
> + */
> +static void
> +ptn_sge(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   if (c->s->options->native_integers) {
> +      ptn_move_dest(c, dest, nir_b2f(b, nir_fge(b, src[0], src[1])));
> +   } else {
> +      ptn_move_dest(c, dest, nir_sge(b, src[0], src[1]));
> +   }
> +}
> +
> +static void
> +ptn_sle(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   nir_ssa_def *commuted[] = { src[1], src[0] };
> +   ptn_sge(c, dest, commuted);
> +}
> +
> +static void
> +ptn_sgt(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   nir_ssa_def *commuted[] = { src[1], src[0] };
> +   ptn_slt(c, dest, commuted);
> +}
> +
> +/**
> + * Emit SEQ.  For platforms with integers, prefer b2f(feq(...)).
> + */
> +static void
> +ptn_seq(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   if (c->s->options->native_integers) {
> +      ptn_move_dest(c, dest, nir_b2f(b, nir_feq(b, src[0], src[1])));
> +   } else {
> +      ptn_move_dest(c, dest, nir_seq(b, src[0], src[1]));
> +   }
> +}
> +
> +/**
> + * Emit SNE.  For platforms with integers, prefer b2f(fne(...)).
> + */
> +static void
> +ptn_sne(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   if (c->s->options->native_integers) {
> +      ptn_move_dest(c, dest, nir_b2f(b, nir_fne(b, src[0], src[1])));
> +   } else {
> +      ptn_move_dest(c, dest, nir_sne(b, src[0], src[1]));
> +   }
> +}
> +
> +static void
> +ptn_xpd(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   ptn_move_dest_masked(c, dest,
> +                        nir_fsub(b,
> +                                 nir_fmul(b,
> +                                          ptn_swizzle(c, src[0], Y, Z, X, X),
> +                                          ptn_swizzle(c, src[1], Z, X, Y, X)),
> +                                 nir_fmul(b,
> +                                          ptn_swizzle(c, src[1], Y, Z, X, X),
> +                                          ptn_swizzle(c, src[0], Z, X, Y, X))),
> +                        WRITEMASK_XYZ);
> +   ptn_move_dest_masked(c, dest, ptn_imm_f(c, 1.0), WRITEMASK_W);
> +}
> +
> +static void
> +ptn_dp2(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   ptn_move_dest(c, dest, ptn_channel(c, nir_fdot2(b, src[0], src[1]), X));
> +}
> +
> +static void
> +ptn_dp3(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   ptn_move_dest(c, dest, ptn_channel(c, nir_fdot3(b, src[0], src[1]), X));
> +}
> +
> +static void
> +ptn_dp4(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   ptn_move_dest(c, dest, ptn_channel(c, nir_fdot4(b, src[0], src[1]), X));
> +}
> +
> +static void
> +ptn_dph(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   nir_ssa_def *dp3 = ptn_channel(c, nir_fdot3(b, src[0], src[1]), X);
> +   ptn_move_dest(c, dest, nir_fadd(b, dp3, ptn_channel(c, src[1], W)));
> +}
> +
> +static void
> +ptn_cmp(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +
> +   if (c->s->options->native_integers) {
> +      ptn_move_dest(c, dest, nir_bcsel(b,
> +                                       nir_flt(b, src[0], ptn_imm_f(c, 0.0)),
> +                                       src[1], src[2]));
> +   } else {
> +      ptn_move_dest(c, dest, nir_fcsel(b,
> +                                       nir_slt(b, src[0], ptn_imm_f(c, 0.0)),
> +                                       src[1], src[2]));
> +   }
> +}
> +
> +static void
> +ptn_lrp(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +   ptn_move_dest(c, dest, nir_flrp(b, src[2], src[1], src[0]));
> +}
> +
> +static void
> +ptn_kil(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
> +{
> +   struct nir_builder *b = &c->build;
> +
> +   nir_ssa_def *cmp = c->s->options->native_integers ?
> +      nir_bany4(b, nir_flt(b, src[0], ptn_imm_f(c, 0.0))) :
> +      nir_fany4(b, nir_slt(b, src[0], ptn_imm_f(c, 0.0)));
> +
> +   nir_intrinsic_instr *discard =
> +      nir_intrinsic_instr_create(c->s, nir_intrinsic_discard_if);
> +   discard->src[0] = nir_src_for_ssa(cmp);
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &discard->instr);
> +}
> +
> +static void
> +ptn_tex(struct ptn_compile *c, nir_alu_dest dest, nir_ssa_def **src,
> +        struct prog_instruction *prog_inst)
> +{
> +   nir_tex_instr *instr;
> +   nir_texop op;
> +   unsigned num_srcs;
> +
> +   switch (prog_inst->Opcode) {
> +   case OPCODE_TEX:
> +      op = nir_texop_tex;
> +      num_srcs = 1;
> +      break;
> +   case OPCODE_TXB:
> +      op = nir_texop_txb;
> +      num_srcs = 2;
> +      break;
> +   case OPCODE_TXD:
> +      op = nir_texop_txd;
> +      num_srcs = 3;
> +      break;
> +   case OPCODE_TXL:
> +      op = nir_texop_txl;
> +      num_srcs = 2;
> +      break;
> +   case OPCODE_TXP:
> +      op = nir_texop_tex;
> +      num_srcs = 2;
> +      break;
> +   case OPCODE_TXP_NV:
> +      assert(!"not handled");
> +      op = nir_texop_tex;
> +      num_srcs = 2;
> +      break;
> +   default:
> +      fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
> +      abort();
> +   }
> +
> +   if (prog_inst->TexShadow)
> +      num_srcs++;
> +
> +   instr = nir_tex_instr_create(c->s, num_srcs);
> +   instr->op = op;
> +   instr->dest_type = nir_type_float;
> +   instr->is_shadow = prog_inst->TexShadow;
> +   instr->sampler_index = prog_inst->TexSrcUnit;
> +
> +   switch (prog_inst->TexSrcTarget) {
> +   case TEXTURE_1D_INDEX:
> +      instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
> +      break;
> +   case TEXTURE_2D_INDEX:
> +      instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
> +      break;
> +   case TEXTURE_3D_INDEX:
> +      instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
> +      break;
> +   case TEXTURE_CUBE_INDEX:
> +      instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
> +      break;
> +   case TEXTURE_RECT_INDEX:
> +      instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
> +      break;
> +   default:
> +      fprintf(stderr, "Unknown texture target %d\n", prog_inst->TexSrcTarget);
> +      abort();
> +   }
> +
> +   switch (instr->sampler_dim) {
> +   case GLSL_SAMPLER_DIM_1D:
> +   case GLSL_SAMPLER_DIM_BUF:
> +      instr->coord_components = 1;
> +      break;
> +   case GLSL_SAMPLER_DIM_2D:
> +   case GLSL_SAMPLER_DIM_RECT:
> +   case GLSL_SAMPLER_DIM_EXTERNAL:
> +   case GLSL_SAMPLER_DIM_MS:
> +      instr->coord_components = 2;
> +      break;
> +   case GLSL_SAMPLER_DIM_3D:
> +   case GLSL_SAMPLER_DIM_CUBE:
> +      instr->coord_components = 3;
> +      break;
> +   }
> +
> +   unsigned src_number = 0;
> +
> +   instr->src[src_number].src =
> +      nir_src_for_ssa(ptn_swizzle(c, src[0], X, Y, Z, W));
> +   instr->src[src_number].src_type = nir_tex_src_coord;
> +   src_number++;
> +
> +   if (prog_inst->Opcode == OPCODE_TXP) {
> +      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(c, src[0], W));
> +      instr->src[src_number].src_type = nir_tex_src_projector;
> +      src_number++;
> +   }
> +
> +   if (prog_inst->Opcode == OPCODE_TXB) {
> +      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(c, src[0], W));
> +      instr->src[src_number].src_type = nir_tex_src_bias;
> +      src_number++;
> +   }
> +
> +   if (prog_inst->Opcode == OPCODE_TXL) {
> +      instr->src[src_number].src = nir_src_for_ssa(ptn_channel(c, src[0], W));
> +      instr->src[src_number].src_type = nir_tex_src_lod;
> +      src_number++;
> +   }
> +
> +   if (instr->is_shadow) {
> +      if (instr->coord_components < 3)
> +         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(c, src[0], Z));
> +      else
> +         instr->src[src_number].src = nir_src_for_ssa(ptn_channel(c, src[0], W));
> +
> +      instr->src[src_number].src_type = nir_tex_src_comparitor;
> +      src_number++;
> +   }
> +
> +   assert(src_number == num_srcs);
> +
> +   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
> +   nir_instr_insert_after_cf_list(c->cf_node_list, &instr->instr);
> +
> +   /* Resolve the writemask on the texture op. */
> +   ptn_move_dest(c, dest, &instr->dest.ssa);
> +}
> +
> +static const nir_op op_trans[MAX_OPCODE] = {
> +   [OPCODE_NOP] = 0,
> +   [OPCODE_ABS] = nir_op_fabs,
> +   [OPCODE_ADD] = nir_op_fadd,
> +   [OPCODE_ARL] = 0,
> +   [OPCODE_CMP] = 0,
> +   [OPCODE_COS] = nir_op_fcos,
> +   [OPCODE_DDX] = nir_op_fddx,
> +   [OPCODE_DDY] = nir_op_fddy,
> +   [OPCODE_DP2] = 0,
> +   [OPCODE_DP3] = 0,
> +   [OPCODE_DP4] = 0,
> +   [OPCODE_DPH] = 0,
> +   [OPCODE_DST] = 0,
> +   [OPCODE_END] = 0,
> +   [OPCODE_EX2] = nir_op_fexp2,
> +   [OPCODE_EXP] = nir_op_fexp,
> +   [OPCODE_FLR] = nir_op_ffloor,
> +   [OPCODE_FRC] = nir_op_ffract,
> +   [OPCODE_LG2] = nir_op_flog2,
> +   [OPCODE_LIT] = 0,
> +   [OPCODE_LOG] = 0,
> +   [OPCODE_LRP] = 0,
> +   [OPCODE_MAD] = nir_op_ffma,
> +   [OPCODE_MAX] = nir_op_fmax,
> +   [OPCODE_MIN] = nir_op_fmin,
> +   [OPCODE_MOV] = nir_op_fmov,
> +   [OPCODE_MUL] = nir_op_fmul,
> +   [OPCODE_POW] = nir_op_fpow,
> +   [OPCODE_RCP] = nir_op_frcp,
> +
> +   [OPCODE_RSQ] = nir_op_frsq,
> +   [OPCODE_SCS] = 0,
> +   [OPCODE_SEQ] = 0,
> +   [OPCODE_SGE] = 0,
> +   [OPCODE_SGT] = 0,
> +   [OPCODE_SIN] = nir_op_fsin,
> +   [OPCODE_SLE] = 0,
> +   [OPCODE_SLT] = 0,
> +   [OPCODE_SNE] = 0,
> +   [OPCODE_SSG] = nir_op_fsign,
> +   [OPCODE_SUB] = nir_op_fsub,
> +   [OPCODE_SWZ] = 0,
> +   [OPCODE_TEX] = 0,
> +   [OPCODE_TRUNC] = nir_op_ftrunc,
> +   [OPCODE_TXB] = 0,
> +   [OPCODE_TXD] = 0,
> +   [OPCODE_TXL] = 0,
> +   [OPCODE_TXP] = 0,
> +   [OPCODE_TXP_NV] = 0,
> +   [OPCODE_XPD] = 0,
> +};
> +
> +static void
> +ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
> +{
> +   struct nir_builder *b = &c->build;
> +   unsigned i;
> +   const unsigned op = prog_inst->Opcode;
> +
> +   if (op == OPCODE_END)
> +      return;
> +
> +   nir_ssa_def *src[3];
> +   for (i = 0; i < 3; i++) {
> +      src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
> +   }
> +   nir_alu_dest dest = ptn_get_dest(c, &prog_inst->DstReg);
> +   if (c->error)
> +      return;
> +
> +   switch (op) {
> +   case OPCODE_RSQ:
> +      ptn_move_dest(c, dest, nir_frsq(b, ptn_channel(c, src[0], X)));
> +      break;
> +
> +   case OPCODE_RCP:
> +      ptn_move_dest(c, dest, nir_frcp(b, ptn_channel(c, src[0], X)));
> +      break;
> +
> +   case OPCODE_EX2:
> +      ptn_move_dest(c, dest, nir_fexp2(b, ptn_channel(c, src[0], X)));
> +      break;
> +
> +   case OPCODE_LG2:
> +      ptn_move_dest(c, dest, nir_flog2(b, ptn_channel(c, src[0], X)));
> +      break;
> +
> +   case OPCODE_POW:
> +      ptn_move_dest(c, dest, nir_fpow(b,
> +                                      ptn_channel(c, src[0], X),
> +                                      ptn_channel(c, src[1], X)));
> +      break;
> +
> +   case OPCODE_COS:
> +      ptn_move_dest(c, dest, nir_fcos(b, ptn_channel(c, src[0], X)));
> +      break;
> +
> +   case OPCODE_SIN:
> +      ptn_move_dest(c, dest, nir_fsin(b, ptn_channel(c, src[0], X)));
> +      break;
> +
> +   case OPCODE_ARL:
> +      ptn_arl(c, dest, src);
> +      break;
> +
> +   case OPCODE_EXP:
> +      ptn_exp(c, dest, src);
> +      break;
> +
> +   case OPCODE_LOG:
> +      ptn_log(c, dest, src);
> +      break;
> +
> +   case OPCODE_LRP:
> +      ptn_lrp(c, dest, src);
> +      break;
> +
> +   case OPCODE_DST:
> +      ptn_dst(c, dest, src);
> +      break;
> +
> +   case OPCODE_LIT:
> +      ptn_lit(c, dest, src);
> +      break;
> +
> +   case OPCODE_XPD:
> +      ptn_xpd(c, dest, src);
> +      break;
> +
> +   case OPCODE_DP2:
> +      ptn_dp2(c, dest, src);
> +      break;
> +
> +   case OPCODE_DP3:
> +      ptn_dp3(c, dest, src);
> +      break;
> +
> +   case OPCODE_DP4:
> +      ptn_dp4(c, dest, src);
> +      break;
> +
> +   case OPCODE_DPH:
> +      ptn_dph(c, dest, src);
> +      break;
> +
> +   case OPCODE_KIL:
> +      ptn_kil(c, dest, src);
> +      break;
> +
> +   case OPCODE_CMP:
> +      ptn_cmp(c, dest, src);
> +      break;
> +
> +   case OPCODE_SCS:
> +      ptn_scs(c, dest, src);
> +      break;
> +
> +   case OPCODE_SLT:
> +      ptn_slt(c, dest, src);
> +      break;
> +
> +   case OPCODE_SGT:
> +      ptn_sgt(c, dest, src);
> +      break;
> +
> +   case OPCODE_SLE:
> +      ptn_sle(c, dest, src);
> +      break;
> +
> +   case OPCODE_SGE:
> +      ptn_sge(c, dest, src);
> +      break;
> +
> +   case OPCODE_SEQ:
> +      ptn_seq(c, dest, src);
> +      break;
> +
> +   case OPCODE_SNE:
> +      ptn_sne(c, dest, src);
> +      break;
> +
> +   case OPCODE_TEX:
> +   case OPCODE_TXB:
> +   case OPCODE_TXD:
> +   case OPCODE_TXL:
> +   case OPCODE_TXP:
> +   case OPCODE_TXP_NV:
> +      ptn_tex(c, dest, src, prog_inst);
> +      break;
> +
> +   case OPCODE_SWZ:
> +      /* Extended swizzles were already handled in ptn_get_src(). */
> +      ptn_alu(c, nir_op_fmov, dest, src);
> +      break;
> +
> +   case OPCODE_NOP:
> +      break;
> +
> +   default:
> +      if (op_trans[op] != 0 || op == OPCODE_MOV) {
> +         ptn_alu(c, op_trans[op], dest, src);
> +      } else {
> +         fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
> +         abort();
> +      }
> +      break;
> +   }
> +
> +   if (prog_inst->SaturateMode) {
> +      assert(prog_inst->SaturateMode == SATURATE_ZERO_ONE);
> +      assert(!dest.dest.is_ssa);
> +      ptn_move_dest(c, dest, nir_fsat(b, ptn_src_for_dest(c, &dest)));
> +   }
> +}
> +
> +/**
> + * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
> + * variables at the end of the shader.
> + *
> + * We don't generate these incrementally as the PROGRAM_OUTPUT values are
> + * written, because there's no output load intrinsic, which means we couldn't
> + * handle writemasks.
> + */
> +static void
> +ptn_add_output_stores(struct ptn_compile *c)
> +{
> +   foreach_list_typed(nir_variable, var, node, &c->s->outputs) {
> +      nir_intrinsic_instr *store =
> +         nir_intrinsic_instr_create(c->s, nir_intrinsic_store_var);
> +      store->num_components = 4;
> +      store->variables[0] =
> +         nir_deref_var_create(c->s, c->output_vars[var->data.location]);
> +      store->src[0].reg.reg = c->output_regs[var->data.location];
> +      nir_instr_insert_after_cf_list(c->cf_node_list, &store->instr);
> +   }
> +}
> +
> +static void
> +setup_registers_and_variables(struct ptn_compile *c)
> +{
> +   struct nir_builder *b = &c->build;
> +
> +   /* Create input variables. */
> +   const int last_input = _mesa_fls(c->prog->InputsRead);
> +   for (int i = 0; i <= last_input; i++) {
> +      if (!(c->prog->InputsRead & BITFIELD64_BIT(i)))
> +         continue;
> +      nir_variable *var = rzalloc(c->s, nir_variable);
> +      var->type = glsl_vec4_type();
> +      var->data.read_only = true;
> +      var->data.mode = nir_var_shader_in;
> +      var->name = ralloc_asprintf(var, "in_%d", i);
> +      var->data.location = i;
> +      var->data.index = 0;
> +
> +      if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
> +         struct gl_fragment_program *fp =
> +            (struct gl_fragment_program *) c->prog;
> +
> +         var->data.interpolation = fp->InterpQualifier[i];
> +
> +         if (i == VARYING_SLOT_POS) {
> +            var->data.origin_upper_left = fp->OriginUpperLeft;
> +            var->data.pixel_center_integer = fp->PixelCenterInteger;
> +         } else if (i == VARYING_SLOT_FOGC) {
> +            /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
> +             * input variable a float, and create a local containing the
> +             * full vec4 value.
> +             */
> +            var->type = glsl_float_type();
> +
> +            nir_intrinsic_instr *load_x =
> +               nir_intrinsic_instr_create(c->s, nir_intrinsic_load_var);
> +            load_x->num_components = 1;
> +            load_x->variables[0] = nir_deref_var_create(c->s, var);
> +            nir_ssa_dest_init(&load_x->instr, &load_x->dest, 1, NULL);
> +            nir_instr_insert_after_cf_list(c->cf_node_list, &load_x->instr);
> +
> +            nir_ssa_def *f001 = nir_vec4(b, &load_x->dest.ssa, ptn_imm_f(c, 0),
> +                                         ptn_imm_f(c, 0), ptn_imm_f(c, 1.0));
> +
> +            nir_variable *fullvar = rzalloc(c->s, nir_variable);
> +            fullvar->type = glsl_vec4_type();
> +            fullvar->data.mode = nir_var_local;
> +            fullvar->name = "fogcoord_tmp";
> +            exec_list_push_tail(&c->impl->locals, &fullvar->node);
> +
> +            nir_intrinsic_instr *store =
> +               nir_intrinsic_instr_create(c->s, nir_intrinsic_store_var);
> +            store->num_components = 4;
> +            store->variables[0] = nir_deref_var_create(c->s, fullvar);
> +            store->src[0] = nir_src_for_ssa(f001);
> +            nir_instr_insert_after_cf_list(c->cf_node_list, &store->instr);
> +
> +            /* Insert the real input into the list so the driver has real
> +             * inputs, but set c->input_vars[i] to the temporary so we use
> +             * the splatted value.
> +             */
> +            exec_list_push_tail(&c->s->inputs, &var->node);
> +            c->input_vars[i] = fullvar;
> +            continue;
> +         }
> +      }
> +
> +      exec_list_push_tail(&c->s->inputs, &var->node);
> +      c->input_vars[i] = var;
> +   }
> +
> +   /* Create output registers and variables. */
> +   int max_outputs = _mesa_fls(c->prog->OutputsWritten);
> +   c->output_regs = rzalloc_array(c, nir_register *, max_outputs);
> +
> +   for (int i = 0; i < max_outputs; i++) {
> +      if (!(c->prog->OutputsWritten & BITFIELD64_BIT(i)))
> +         continue;
> +
> +      /* Since we can't load from outputs in the IR, we make temporaries
> +       * for the outputs and emit stores to the real outputs at the end of
> +       * the shader.
> +       */
> +      nir_register *reg = nir_local_reg_create(c->impl);
> +      reg->num_components = 4;
> +
> +      nir_variable *var = rzalloc(c->s, nir_variable);
> +      var->type = glsl_vec4_type();
> +      var->data.mode = nir_var_shader_out;
> +      var->name = ralloc_asprintf(var, "out_%d", i);
> +
> +      var->data.location = i;
> +      var->data.index = 0;
> +
> +      c->output_regs[i] = reg;
> +
> +      exec_list_push_tail(&c->s->outputs, &var->node);
> +      c->output_vars[i] = var;
> +   }
> +
> +   /* Create temporary registers. */
> +   c->temp_regs = rzalloc_array(c, nir_register *, c->prog->NumTemporaries);
> +
> +   nir_register *reg;
> +   for (int i = 0; i < c->prog->NumTemporaries; i++) {
> +      reg = nir_local_reg_create(c->impl);
> +      if (!reg) {
> +         c->error = true;
> +         return;
> +      }
> +      reg->num_components = 4;
> +      c->temp_regs[i] = reg;
> +   }
> +
> +   /* Create the address register (for ARB_vertex_program). */
> +   reg = nir_local_reg_create(c->impl);
> +   if (!reg) {
> +      c->error = true;
> +      return;
> +   }
> +   reg->num_components = 1;
> +   c->addr_reg = reg;
> +
> +   /* Set the number of uniforms */
> +   c->s->num_uniforms = 4 * c->prog->Parameters->NumParameters;
> +}
> +
> +struct nir_shader *
> +prog_to_nir(struct gl_program *prog,
> +            const nir_shader_compiler_options *options)
> +{
> +   struct ptn_compile *c;
> +   struct nir_shader *s;
> +
> +   c = rzalloc(NULL, struct ptn_compile);
> +   if (!c)
> +      return NULL;
> +   s = nir_shader_create(NULL, options);
> +   if (!s)
> +      goto fail;
> +   c->s = s;
> +   c->prog = prog;
> +
> +   nir_function *func = nir_function_create(s, "main");
> +   nir_function_overload *overload = nir_function_overload_create(func);
> +   c->impl = nir_function_impl_create(overload);
> +   c->cf_node_list = &c->impl->body;
> +
> +   c->build.shader = c->s;
> +   c->build.impl = c->impl;
> +   c->build.cf_node_list = c->cf_node_list;
> +
> +   setup_registers_and_variables(c);
> +
> +   for (unsigned int i = 0; i < prog->NumInstructions; i++) {
> +      ptn_emit_instruction(c, &prog->Instructions[i]);
> +
> +      if (unlikely(c->error))
> +         break;
> +   }
> +
> +   ptn_add_output_stores(c);
> +
> +fail:
> +   if (c->error) {
> +      ralloc_free(s);
> +      s = NULL;
> +   }
> +   ralloc_free(c);
> +   return s;
> +}
> diff --git a/src/mesa/program/prog_to_nir.h b/src/mesa/program/prog_to_nir.h
> new file mode 100644
> index 0000000..3c9b664
> --- /dev/null
> +++ b/src/mesa/program/prog_to_nir.h
> @@ -0,0 +1,37 @@
> +/*
> + * Copyright © 2015 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#pragma once
> +#ifndef PROG_TO_NIR_H
> +#define PROG_TO_NIR_H
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +struct nir_shader *prog_to_nir(struct gl_program *prog,
> +                               const nir_shader_compiler_options *options);
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +#endif
> --
> 2.3.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev