Mesa (master): nvc0: hook up to new shader code generator

Christoph Bumiller chrisbmr at kemper.freedesktop.org
Wed Sep 14 14:22:39 UTC 2011


Module: Mesa
Branch: master
Commit: 3afabfb929cf24a783c10c99bf0d86245e70a94a
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3afabfb929cf24a783c10c99bf0d86245e70a94a

Author: Christoph Bumiller <e0425955 at student.tuwien.ac.at>
Date:   Tue Sep 13 23:10:35 2011 +0200

nvc0: hook up to new shader code generator

Also includes loading of shared shader library code (used for f64
and integer division) and setting up the immediate array buffer
which is appended to the code.

---

 src/gallium/drivers/nv50/nv50_context.h      |    1 +
 src/gallium/drivers/nv50/nv50_program.c      |   13 +
 src/gallium/drivers/nvc0/Makefile.sources    |    6 -
 src/gallium/drivers/nvc0/nvc0_context.c      |    3 +
 src/gallium/drivers/nvc0/nvc0_context.h      |    3 +
 src/gallium/drivers/nvc0/nvc0_program.c      |  973 ++++++++++++--------------
 src/gallium/drivers/nvc0/nvc0_program.h      |   66 +--
 src/gallium/drivers/nvc0/nvc0_screen.c       |    7 +-
 src/gallium/drivers/nvc0/nvc0_screen.h       |    1 +
 src/gallium/drivers/nvc0/nvc0_shader_state.c |   47 +-
 10 files changed, 508 insertions(+), 612 deletions(-)

diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h
index 77dbc82..ecffbbf 100644
--- a/src/gallium/drivers/nv50/nv50_context.h
+++ b/src/gallium/drivers/nv50/nv50_context.h
@@ -153,6 +153,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
 
 /* nv50_program.c */
 boolean nv50_program_translate(struct nv50_program *);
+boolean nv50_program_translate_new(struct nv50_program *);
 void nv50_program_destroy(struct nv50_context *, struct nv50_program *);
 
 /* nv50_query.c */
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index 4def93d..b61400b 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -29,6 +29,8 @@
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_dump.h"
 
+#include "codegen/nv50_ir_driver.h"
+
 static INLINE unsigned
 bitcount4(const uint32_t val)
 {
@@ -625,6 +627,17 @@ nv50_prog_scan(struct nv50_translation_info *ti)
    return ret;
 }
 
+/* Temporary, need a reference to nv50_ir_generate_code in libnv50 or
+ * it "gets disappeared" and cannot be used in libnvc0 ...
+ */
+boolean
+nv50_program_translate_new(struct nv50_program *p)
+{
+   struct nv50_ir_prog_info info;
+
+   return nv50_ir_generate_code(&info);
+}
+
 boolean
 nv50_program_translate(struct nv50_program *p)
 {
diff --git a/src/gallium/drivers/nvc0/Makefile.sources b/src/gallium/drivers/nvc0/Makefile.sources
index 9b1fb97..95f796f 100644
--- a/src/gallium/drivers/nvc0/Makefile.sources
+++ b/src/gallium/drivers/nvc0/Makefile.sources
@@ -13,12 +13,6 @@ C_SOURCES := \
 	nvc0_vbo.c \
 	nvc0_program.c \
 	nvc0_shader_state.c \
-	nvc0_pc.c \
-	nvc0_pc_print.c \
-	nvc0_pc_emit.c \
-	nvc0_tgsi_to_nc.c \
-	nvc0_pc_optimize.c \
-	nvc0_pc_regalloc.c \
 	nvc0_push.c \
 	nvc0_push2.c \
 	nvc0_query.c
diff --git a/src/gallium/drivers/nvc0/nvc0_context.c b/src/gallium/drivers/nvc0/nvc0_context.c
index 360afbb..8fa1675 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.c
+++ b/src/gallium/drivers/nvc0/nvc0_context.c
@@ -152,6 +152,9 @@ nvc0_create(struct pipe_screen *pscreen, void *priv)
 
    nouveau_context_init_vdec(&nvc0->base);
 
+   /* shader builtin library is per-screen, but we need a context for m2mf */
+   nvc0_program_library_upload(nvc0);
+
    return pipe;
 }
 
diff --git a/src/gallium/drivers/nvc0/nvc0_context.h b/src/gallium/drivers/nvc0/nvc0_context.h
index 353a541..c11d1c3 100644
--- a/src/gallium/drivers/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nvc0/nvc0_context.h
@@ -79,6 +79,7 @@ struct nvc0_context {
       uint8_t num_textures[5];
       uint8_t num_samplers[5];
       uint8_t tls_required; /* bitmask of shader types using l[] */
+      uint8_t c14_bound; /* whether immediate array constbuf is bound */
       uint16_t scissor;
       uint32_t uniform_buffer_bound[5];
    } state;
@@ -161,7 +162,9 @@ extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
 
 /* nvc0_program.c */
 boolean nvc0_program_translate(struct nvc0_program *);
+boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
 void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+void nvc0_program_library_upload(struct nvc0_context *);
 
 /* nvc0_query.c */
 void nvc0_init_query_functions(struct nvc0_context *);
diff --git a/src/gallium/drivers/nvc0/nvc0_program.c b/src/gallium/drivers/nvc0/nvc0_program.c
index bcee027..eaad080 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nvc0/nvc0_program.c
@@ -20,479 +20,343 @@
  * SOFTWARE.
  */
 
-#include "pipe/p_shader_tokens.h"
 #include "pipe/p_defines.h"
 
-#include "tgsi/tgsi_parse.h"
-#include "tgsi/tgsi_util.h"
-#include "tgsi/tgsi_dump.h"
-
 #include "nvc0_context.h"
-#include "nvc0_pc.h"
-
-static unsigned
-nvc0_tgsi_src_mask(const struct tgsi_full_instruction *inst, int c)
-{
-   unsigned mask = inst->Dst[0].Register.WriteMask;
-
-   switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_COS:
-   case TGSI_OPCODE_SIN:
-      return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0);
-   case TGSI_OPCODE_DP3:
-      return 0x7;
-   case TGSI_OPCODE_DP4:
-   case TGSI_OPCODE_DPH:
-   case TGSI_OPCODE_KIL: /* WriteMask ignored */
-      return 0xf;
-   case TGSI_OPCODE_DST:
-      return mask & (c ? 0xa : 0x6);
-   case TGSI_OPCODE_EX2:
-   case TGSI_OPCODE_EXP:
-   case TGSI_OPCODE_LG2:
-   case TGSI_OPCODE_LOG:
-   case TGSI_OPCODE_POW:
-   case TGSI_OPCODE_RCP:
-   case TGSI_OPCODE_RSQ:
-   case TGSI_OPCODE_SCS:
-      return 0x1;
-   case TGSI_OPCODE_IF:
-      return 0x1;
-   case TGSI_OPCODE_LIT:
-      return 0xb;
-   case TGSI_OPCODE_TEX:
-   case TGSI_OPCODE_TXB:
-   case TGSI_OPCODE_TXL:
-   case TGSI_OPCODE_TXP:
-   {
-      const struct tgsi_instruction_texture *tex;
-
-      assert(inst->Instruction.Texture);
-      tex = &inst->Texture;
-
-      mask = 0x7;
-      if (inst->Instruction.Opcode != TGSI_OPCODE_TEX &&
-          inst->Instruction.Opcode != TGSI_OPCODE_TXD)
-         mask |= 0x8; /* bias, lod or proj */
-
-      switch (tex->Texture) {
-      case TGSI_TEXTURE_1D:
-         mask &= 0x9;
-         break;
-      case TGSI_TEXTURE_SHADOW1D:
-         mask &= 0x5;
-         break;
-      case TGSI_TEXTURE_2D:
-         mask &= 0xb;
-         break;
-      default:
-         break;
-      }
-   }
-  	   return mask;
-   case TGSI_OPCODE_XPD:
-   {
-      unsigned x = 0;
-      if (mask & 1) x |= 0x6;
-      if (mask & 2) x |= 0x5;
-      if (mask & 4) x |= 0x3;
-      return x;
-   }
-   default:
-      break;
-   }
 
-   return mask;
-}
+#include "nv50/codegen/nv50_ir_driver.h"
 
+/* If only they told use the actual semantic instead of just GENERIC ... */
 static void
-nvc0_indirect_inputs(struct nvc0_translation_info *ti, int id)
+nvc0_mesa_varying_hack(struct nv50_ir_varying *var)
 {
-   int i, c;
-
-   for (i = 0; i < PIPE_MAX_SHADER_INPUTS; ++i)
-      for (c = 0; c < 4; ++c)
-         ti->input_access[i][c] = id;
+   unsigned c;
 
-   ti->indirect_inputs = TRUE;
-}
-
-static void
-nvc0_indirect_outputs(struct nvc0_translation_info *ti, int id)
-{
-   int i, c;
+   if (var->sn != TGSI_SEMANTIC_GENERIC)
+      return;
 
-   for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i)
+   if (var->si <= 7) /* gl_TexCoord */
       for (c = 0; c < 4; ++c)
-         ti->output_access[i][c] = id;
-
-   ti->indirect_outputs = TRUE;
+         var->slot[c] = (0x300 + var->si * 0x10 + c * 0x4) / 4;
+   else
+   if (var->si == 9) /* gl_PointCoord */
+      for (c = 0; c < 4; ++c)
+         var->slot[c] = (0x2e0 + c * 0x4) / 4;
+   else
+      for (c = 0; c < 4; ++c) /* move down user varyings (first has index 8) */
+         var->slot[c] -= 0x80 / 4;
 }
 
-static INLINE unsigned
-nvc0_system_value_location(unsigned sn, unsigned si, boolean *is_input)
+static uint32_t
+nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
 {
-   /* NOTE: locations 0xfxx indicate special regs */
    switch (sn) {
-      /*
-   case TGSI_SEMANTIC_VERTEXID:
-      *is_input = TRUE;
-      return 0x2fc;
-      */
-   case TGSI_SEMANTIC_PRIMID:
-      *is_input = TRUE;
-      return 0x60;
-      /*
-   case TGSI_SEMANTIC_LAYER_INDEX:
-      return 0x64;
-   case TGSI_SEMANTIC_VIEWPORT_INDEX:
-      return 0x68;
-      */
-   case TGSI_SEMANTIC_INSTANCEID:
-      *is_input = TRUE;
-      return 0x2f8;
-   case TGSI_SEMANTIC_FACE:
-      *is_input = TRUE;
-      return 0x3fc;
-      /*
-   case TGSI_SEMANTIC_INVOCATIONID:
-      return 0xf11;
-      */
+/* case TGSI_SEMANTIC_TESSFACTOR:   return 0x000 + si * 0x4; */
+   case TGSI_SEMANTIC_PRIMID:       return 0x060;
+   case TGSI_SEMANTIC_PSIZE:        return 0x06c;
+   case TGSI_SEMANTIC_POSITION:     return 0x070;
+   case TGSI_SEMANTIC_GENERIC:      return ubase + si * 0x10;
+   case TGSI_SEMANTIC_FOG:          return 0x270;
+   case TGSI_SEMANTIC_COLOR:        return 0x280 + si * 0x10;
+   case TGSI_SEMANTIC_BCOLOR:       return 0x2a0 + si * 0x10;
+/* case TGSI_SEMANTIC_CLIP:         return 0x2c0 + si * 0x10; */
+/* case TGSI_SEMANTIC_POINTCOORD:   return 0x2e0; */
+/* case TGSI_SEMANTIC_TESSCOORD:    return ~0; */ /* 0x2f0, but special load */
+   case TGSI_SEMANTIC_INSTANCEID:   return 0x2f8;
+/* case TGSI_SEMANTIC_VERTEXID:     return 0x2fc; */
+/* case TGSI_SEMANTIC_TEXCOORD:     return 0x300 + si * 0x10; */
+   case TGSI_SEMANTIC_FACE:         return 0x3fc;
+/* case TGSI_SEMANTIC_INVOCATIONID: return ~0; */
    default:
-      assert(0);
-      return 0x000;
+      assert(!"invalid TGSI input semantic");
+      return ~0;
    }
 }
 
-static INLINE unsigned
-nvc0_varying_location(unsigned sn, unsigned si)
+static uint32_t
+nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
 {
    switch (sn) {
-   case TGSI_SEMANTIC_POSITION:
-      return 0x70;
-   case TGSI_SEMANTIC_COLOR:
-      return 0x280 + (si * 16); /* are these hard-wired ? */
-   case TGSI_SEMANTIC_BCOLOR:
-      return 0x2a0 + (si * 16);
-   case TGSI_SEMANTIC_FOG:
-      return 0x270;
-   case TGSI_SEMANTIC_PSIZE:
-      return 0x6c;
-      /*
-   case TGSI_SEMANTIC_PNTC:
-      return 0x2e0;
-      */
-   case TGSI_SEMANTIC_GENERIC:
-      /* We'd really like to distinguish between TEXCOORD and GENERIC here,
-       * since only 0x300 to 0x37c can be replaced by sprite coordinates.
-       * Also, gl_PointCoord should be a system value and must be assigned to
-       * address 0x2e0. For now, let's cheat:
-       */
-      assert(si < 31);
-      if (si <= 7)
-         return 0x300 + si * 16;
-      if (si == 9)
-         return 0x2e0;
-      return 0x80 + ((si - 8) * 16);
-   case TGSI_SEMANTIC_NORMAL:
-      return 0x360;
-   case TGSI_SEMANTIC_PRIMID:
-      return 0x40;
-   case TGSI_SEMANTIC_FACE:
-      return 0x3fc;
-   case TGSI_SEMANTIC_EDGEFLAG: /* doesn't exist, set value like for an sreg */
-      return 0xf00;
-      /*
-   case TGSI_SEMANTIC_CLIP_DISTANCE:
-      return 0x2c0 + (si * 4);
-      */
+/* case TGSI_SEMANTIC_TESSFACTOR:    return 0x000 + si * 0x4; */
+   case TGSI_SEMANTIC_PRIMID:        return 0x040;
+/* case TGSI_SEMANTIC_LAYER:         return 0x064; */
+/* case TGSI_SEMANTIC_VIEWPORTINDEX: return 0x068; */
+   case TGSI_SEMANTIC_PSIZE:         return 0x06c;
+   case TGSI_SEMANTIC_POSITION:      return 0x070;
+   case TGSI_SEMANTIC_GENERIC:       return ubase + si * 0x10;
+   case TGSI_SEMANTIC_FOG:           return 0x270;
+   case TGSI_SEMANTIC_COLOR:         return 0x280 + si * 0x10;
+   case TGSI_SEMANTIC_BCOLOR:        return 0x2a0 + si * 0x10;
+/* case TGSI_SEMANTIC_CLIP:          return 0x2c0 + si * 0x10; */
+/* case TGSI_SEMANTIC_TEXCOORD:      return 0x300 + si * 0x10; */
+   case TGSI_SEMANTIC_EDGEFLAG:      return ~0;
    default:
-      assert(0);
-      return 0x000;
+      assert(!"invalid TGSI output semantic");
+      return ~0;
    }
 }
 
-static INLINE unsigned
-nvc0_interp_mode(const struct tgsi_full_declaration *decl)
+static int
+nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
 {
-   unsigned mode;
-
-   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_CONSTANT)
-      mode = NVC0_INTERP_FLAT;
-   else
-   if (decl->Declaration.Interpolate == TGSI_INTERPOLATE_PERSPECTIVE)
-      mode = NVC0_INTERP_PERSPECTIVE;
-   else
-   if (decl->Declaration.Semantic && decl->Semantic.Name == TGSI_SEMANTIC_COLOR)
-      mode = NVC0_INTERP_PERSPECTIVE;
-   else
-      mode = NVC0_INTERP_LINEAR;
+   unsigned i, c;
 
-   if (decl->Declaration.Centroid)
-      mode |= NVC0_INTERP_CENTROID;
+   for (i = 0; i < info->numInputs; ++i)
+      for (c = 0; c < 4; ++c)
+         info->in[i].slot[c] = (0x80 + i * 0x10 + c * 0x4) / 4;
 
-   return mode;
+   return 0;
 }
 
-static void
-prog_immediate(struct nvc0_translation_info *ti,
-               const struct tgsi_full_immediate *imm)
+static int
+nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
 {
-   int c;
-   unsigned n = ti->immd32_nr++;
+   unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+   unsigned offset;
+   unsigned i, c;
 
-   assert(ti->immd32_nr <= ti->scan.immediate_count);
+   for (i = 0; i < info->numInputs; ++i) {
+      offset = nvc0_shader_input_address(info->in[i].sn,
+                                         info->in[i].si, ubase);
+      if (info->in[i].patch && offset >= 0x20)
+         offset = 0x20 + info->in[i].si * 0x10;
 
-   for (c = 0; c < 4; ++c)
-      ti->immd32[n * 4 + c] = imm->u[c].Uint;
+      for (c = 0; c < 4; ++c)
+         info->in[i].slot[c] = (offset + c * 0x4) / 4;
 
-   ti->immd32_ty[n] = imm->Immediate.DataType;
+      nvc0_mesa_varying_hack(&info->in[i]);
+   }
+
+   return 0;
 }
 
-static boolean
-prog_decl(struct nvc0_translation_info *ti,
-          const struct tgsi_full_declaration *decl)
+static int
+nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
 {
+   unsigned last = info->prop.fp.numColourResults * 4;
    unsigned i, c;
-   unsigned sn = TGSI_SEMANTIC_GENERIC;
-   unsigned si = 0;
-   const unsigned first = decl->Range.First;
-   const unsigned last = decl->Range.Last;
-
-   if (decl->Declaration.Semantic) {
-      sn = decl->Semantic.Name;
-      si = decl->Semantic.Index;
-   }
-   
-   switch (decl->Declaration.File) {
-   case TGSI_FILE_INPUT:
-      for (i = first; i <= last; ++i) {
-         if (ti->prog->type == PIPE_SHADER_VERTEX) {
-            for (c = 0; c < 4; ++c)
-               ti->input_loc[i][c] = 0x80 + i * 16 + c * 4;
-         } else {
-            for (c = 0; c < 4; ++c)
-               ti->input_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
-            /* for sprite coordinates: */
-            ti->prog->fp.in_pos[i] = ti->input_loc[i][0] / 4;
-         }
-         if (ti->prog->type == PIPE_SHADER_FRAGMENT)
-            ti->interp_mode[i] = nvc0_interp_mode(decl);
-      }
-      break;
-   case TGSI_FILE_OUTPUT:
-      for (i = first; i <= last; ++i, ++si) {
-         if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
-            si = i;
-            if (i == ti->fp_depth_output) {
-               ti->output_loc[i][2] = (ti->scan.num_outputs - 1) * 4;
-            } else {
-               if (i > ti->fp_depth_output)
-                  si -= 1;
-               for (c = 0; c < 4; ++c)
-                  ti->output_loc[i][c] = si * 4 + c;
-            }
-         } else {
-            if (sn == TGSI_SEMANTIC_EDGEFLAG)
-               ti->edgeflag_out = i;
-            for (c = 0; c < 4; ++c)
-               ti->output_loc[i][c] = nvc0_varying_location(sn, si) + c * 4;
-            /* for TFB_VARYING_LOCS: */
-            ti->prog->vp.out_pos[i] = ti->output_loc[i][0] / 4;
-         }
-      }
-      break;
-   case TGSI_FILE_SYSTEM_VALUE:
-      i = first;
-      ti->sysval_loc[i] = nvc0_system_value_location(sn, si, &ti->sysval_in[i]);
-      assert(first == last);
-      break;
-   case TGSI_FILE_TEMPORARY:
-      ti->temp128_nr = MAX2(ti->temp128_nr, last + 1);
-      break;
-   case TGSI_FILE_NULL:
-   case TGSI_FILE_CONSTANT:
-   case TGSI_FILE_SAMPLER:
-   case TGSI_FILE_ADDRESS:
-   case TGSI_FILE_IMMEDIATE:
-   case TGSI_FILE_PREDICATE:
-      break;
-   default:
-      NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl->Declaration.File);
-      return FALSE;
-   }
-   return TRUE;
-}
 
-static void
-prog_inst(struct nvc0_translation_info *ti,
-          const struct tgsi_full_instruction *inst, int id)
-{
-   const struct tgsi_dst_register *dst;
-   const struct tgsi_src_register *src;
-   int s, c, k;
-   unsigned mask;
-
-   if (inst->Instruction.Opcode == TGSI_OPCODE_BGNSUB) {
-      ti->subr[ti->num_subrs].first_insn = id - 1;
-      ti->subr[ti->num_subrs].id = ti->num_subrs + 1; /* id 0 is main program */
-      ++ti->num_subrs;
-   }
+   for (i = 0; i < info->numOutputs; ++i)
+      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+         for (c = 0; c < 4; ++c)
+            info->out[i].slot[c] = info->out[i].si * 4 + c;
 
-   if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
-      dst = &inst->Dst[0].Register;
+   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+      info->out[info->io.sampleMask].slot[0] = last++;
 
-      for (c = 0; c < 4; ++c) {
-         if (dst->Indirect)
-            nvc0_indirect_outputs(ti, id);
-         if (!(dst->WriteMask & (1 << c)))
-            continue;
-         ti->output_access[dst->Index][c] = id;
-      }
+   if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+      info->out[info->io.fragDepth].slot[2] = last;
 
-      if (inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
-          inst->Src[0].Register.File == TGSI_FILE_INPUT &&
-          dst->Index == ti->edgeflag_out)
-         ti->prog->vp.edgeflag = inst->Src[0].Register.Index;
-   } else
-   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
-      if (inst->Dst[0].Register.Indirect)
-         ti->require_stores = TRUE;
-   }
+   return 0;
+}
 
-   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
-      src = &inst->Src[s].Register;
-      if (src->File == TGSI_FILE_TEMPORARY)
-         if (inst->Src[s].Register.Indirect)
-            ti->require_stores = TRUE;
-      if (src->File != TGSI_FILE_INPUT)
-         continue;
-      mask = nvc0_tgsi_src_mask(inst, s);
+static int
+nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+   unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+   unsigned offset;
+   unsigned i, c;
 
-      if (inst->Src[s].Register.Indirect)
-         nvc0_indirect_inputs(ti, id);
+   for (i = 0; i < info->numOutputs; ++i) {
+      offset = nvc0_shader_output_address(info->out[i].sn,
+                                          info->out[i].si, ubase);
+      if (info->out[i].patch && offset >= 0x20)
+         offset = 0x20 + info->out[i].si * 0x10;
 
-      for (c = 0; c < 4; ++c) {
-         if (!(mask & (1 << c)))
-            continue;
-         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
-         if (k <= TGSI_SWIZZLE_W)
-            ti->input_access[src->Index][k] = id;
-      }
+      for (c = 0; c < 4; ++c)
+         info->out[i].slot[c] = (offset + c * 0x4) / 4;
+
+      nvc0_mesa_varying_hack(&info->out[i]);
    }
+
+   return 0;
 }
 
-/* Probably should introduce something like struct tgsi_function_declaration
- * instead of trying to guess inputs/outputs.
- */
-static void
-prog_subroutine_inst(struct nvc0_subroutine *subr,
-                     const struct tgsi_full_instruction *inst)
+static int
+nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
 {
-   const struct tgsi_dst_register *dst;
-   const struct tgsi_src_register *src;
-   int s, c, k;
-   unsigned mask;
-
-   for (s = 0; s < inst->Instruction.NumSrcRegs; ++s) {
-      src = &inst->Src[s].Register;
-      if (src->File != TGSI_FILE_TEMPORARY)
-         continue;
-      mask = nvc0_tgsi_src_mask(inst, s);
+   int ret;
 
-      for (c = 0; c < 4; ++c) {
-         k = tgsi_util_get_full_src_register_swizzle(&inst->Src[s], c);
+   if (info->type == PIPE_SHADER_VERTEX)
+      ret = nvc0_vp_assign_input_slots(info);
+   else
+      ret = nvc0_sp_assign_input_slots(info);
+   if (ret)
+      return ret;
 
-         if ((mask & (1 << c)) && k < TGSI_SWIZZLE_W)
-            if (!(subr->retv[src->Index / 32][k] & (1 << (src->Index % 32))))
-               subr->argv[src->Index / 32][k] |= 1 << (src->Index % 32);
-      }
-   }
+   if (info->type == PIPE_SHADER_FRAGMENT)
+      ret = nvc0_fp_assign_output_slots(info);
+   else
+      ret = nvc0_sp_assign_output_slots(info);
+   return ret;
+}
 
-   if (inst->Dst[0].Register.File == TGSI_FILE_TEMPORARY) {
-      dst = &inst->Dst[0].Register;
+static INLINE void
+nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
+{
+   uint8_t min = (vp->hdr[4] >> 12) & 0xff;
+   uint8_t max = (vp->hdr[4] >> 24);
 
-      for (c = 0; c < 4; ++c)
-         if (dst->WriteMask & (1 << c))
-            subr->retv[dst->Index / 32][c] |= 1 << (dst->Index % 32);
-   }
+   min = MIN2(min, slot);
+   max = MAX2(max, slot);
+
+   vp->hdr[4] = (max << 24) | (min << 12);
 }
 
+/* Common part of header generation for VP, TCP, TEP and GP. */
 static int
-nvc0_vp_gp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
 {
-   int i, c;
-   unsigned a;
+   unsigned i, c, a;
 
-   for (a = 0x80/4, i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
-      for (c = 0; c < 4; ++c, ++a)
-         if (ti->input_access[i][c])
-            vp->hdr[5 + a / 32] |= 1 << (a % 32); /* VP_ATTR_EN */
+   for (i = 0; i < info->numInputs; ++i) {
+      if (info->in[i].patch)
+         continue;
+      for (c = 0; c < 4; ++c) {
+         a = info->in[i].slot[c];
+         if (info->in[i].mask & (1 << c))
+            vp->hdr[5 + a / 32] |= 1 << (a % 32);
+      }
    }
 
-   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
-      a = (ti->output_loc[i][0] - 0x40) / 4;
-      if (ti->output_loc[i][0] >= 0xf00)
+   for (i = 0; i < info->numOutputs; ++i) {
+      if (info->out[i].patch)
          continue;
-      for (c = 0; c < 4; ++c, ++a) {
-         if (!ti->output_access[i][c])
+      for (c = 0; c < 4; ++c) {
+         if (!(info->out[i].mask & (1 << c)))
             continue;
-         vp->hdr[13 + a / 32] |= 1 << (a % 32); /* VP_EXPORT_EN */
+         assert(info->out[i].slot[c] >= 0x40 / 4);
+         a = info->out[i].slot[c] - 0x40 / 4;
+         vp->hdr[13 + a / 32] |= 1 << (a % 32);
+         if (info->out[i].oread)
+            nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]);
       }
    }
 
-   for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
-      a = ti->sysval_loc[i] / 4;
-      if (a > 0 && a < (0xf00 / 4))
-         vp->hdr[(ti->sysval_in[i] ? 5 : 13) + a / 32] |= 1 << (a % 32);
+   for (i = 0; i < info->numSysVals; ++i) {
+      switch (info->sv[i].sn) {
+      case TGSI_SEMANTIC_PRIMID:
+         vp->hdr[5] |= 1 << 24;
+         break;
+      case TGSI_SEMANTIC_INSTANCEID:
+         vp->hdr[10] |= 1 << 30;
+         break;
+         /*
+      case TGSI_SEMANTIC_VERTEXID:
+         vp->hdr[10] |= 1 << 31;
+         break;
+         */
+      default:
+         break;
+      }
    }
 
    return 0;
 }
 
 static int
-nvc0_vp_gen_header(struct nvc0_program *vp, struct nvc0_translation_info *ti)
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
 {
-   vp->hdr[0] = 0x20461;
+   vp->hdr[0] = 0x20061 | (1 << 10);
    vp->hdr[4] = 0xff000;
 
-   vp->hdr[18] = (1 << vp->vp.num_ucps) - 1;
+   vp->hdr[18] = (1 << info->io.clipDistanceCount) - 1;
+
+   return nvc0_vtgp_gen_header(vp, info);
+}
+
+#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
+static void
+nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
+{
+   switch (info->prop.tp.domain) {
+   case PIPE_PRIM_LINES:
+      tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES;
+      break;
+   case PIPE_PRIM_TRIANGLES:
+      tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
+      if (info->prop.tp.winding > 0)
+         tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+      break;
+   case PIPE_PRIM_QUADS:
+      tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
+      break;
+   default:
+      tp->tp.tess_mode = ~0;
+      return;
+   }
+   if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS)
+      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
 
-   return nvc0_vp_gp_gen_header(vp, ti);
+   switch (info->prop.tp.partitioning) {
+   case PIPE_TESS_PART_INTEGER:
+   case PIPE_TESS_PART_POW2:
+      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
+      break;
+   case PIPE_TESS_PART_FRACT_ODD:
+      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
+      break;
+   case PIPE_TESS_PART_FRACT_EVEN:
+      tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
+      break;
+   default:
+      assert(!"invalid tessellator partitioning");
+      break;
+   }
 }
+#endif
 
+#ifdef PIPE_SHADER_HULL
 static int
-nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
+nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
 {
-   unsigned invocations = 1;
-   unsigned max_output_verts, output_prim;
-   unsigned i;
+   unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
 
-   gp->hdr[0] = 0x21061;
+   tcp->tp.input_patch_size = info->prop.tp.inputPatchSize;
 
-   for (i = 0; i < ti->scan.num_properties; ++i) {
-      switch (ti->scan.properties[i].name) {
-      case TGSI_PROPERTY_GS_OUTPUT_PRIM:
-         output_prim = ti->scan.properties[i].data[0];
-         break;
-      case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
-         max_output_verts = ti->scan.properties[i].data[0];
-         assert(max_output_verts < 512);
-         break;
-         /*
-      case TGSI_PROPERTY_GS_INVOCATIONS:
-         invocations = ti->scan.properties[i].data[0];
-         assert(invocations <= 32);
-         break;
-         */
-      default:
-         break;
-      }
-   }
+   if (info->numPatchConstants)
+      opcs = 8 + info->numPatchConstants * 4;
+
+   tcp->hdr[0] = 0x20061 | (2 << 10);
+
+   tcp->hdr[1] = opcs << 24;
+   tcp->hdr[2] = info->prop.tp.outputPatchSize << 24;
+
+   tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */
+
+   nvc0_vtgp_gen_header(tcp, info);
 
-   gp->hdr[2] = MIN2(invocations, 32) << 24;
+   nvc0_tp_get_tess_mode(tcp, info);
 
-   switch (output_prim) {
+   return 0;
+}
+#endif
+
+#ifdef PIPE_SHADER_DOMAIN
+static int
+nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
+{
+   tep->hdr[0] = 0x20061 | (3 << 10);
+   tep->hdr[4] = 0xff000;
+
+   nvc0_vtgp_gen_header(tep, info);
+
+   nvc0_tp_get_tess_mode(tep, info);
+
+   tep->hdr[18] |= 0x3 << 12; /* ? */
+
+   return 0;
+}
+#endif
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
+{
+   gp->hdr[0] = 0x20061 | (4 << 10);
+
+   gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
+
+   switch (info->prop.gp.outputPrim) {
    case PIPE_PRIM_POINTS:
       gp->hdr[3] = 0x01000000;
       gp->hdr[0] |= 0xf0000000;
@@ -510,206 +374,263 @@ nvc0_gp_gen_header(struct nvc0_program *gp, struct nvc0_translation_info *ti)
       break;
    }
 
-   gp->hdr[4] = max_output_verts & 0x1ff;
+   gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+
+   return nvc0_vtgp_gen_header(gp, info);
+}
+
+#define NVC0_INTERP_FLAT          (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE   (2 << 0)
+#define NVC0_INTERP_LINEAR        (3 << 0)
+#define NVC0_INTERP_CENTROID      (1 << 2)
 
-   return nvc0_vp_gp_gen_header(gp, ti);
+static uint8_t
+nvc0_hdr_interp_mode(const struct nv50_ir_varying *var)
+{
+   if (var->linear)
+      return NVC0_INTERP_LINEAR;
+   if (var->flat)
+      return NVC0_INTERP_FLAT;
+   return NVC0_INTERP_PERSPECTIVE;
 }
 
 static int
-nvc0_fp_gen_header(struct nvc0_program *fp, struct nvc0_translation_info *ti)
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
 {
-   int i, c;
-   unsigned a, m;
-   
-   fp->hdr[0] = 0x21462;
+   unsigned i, c, a, m;
+
+   fp->hdr[0] = 0x20062 | (5 << 10);
    fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
 
-   if (ti->scan.uses_kill)
+   if (info->prop.fp.usesDiscard)
       fp->hdr[0] |= 0x8000;
-   if (ti->scan.writes_z) {
+   if (info->prop.fp.numColourResults > 1)
+      fp->hdr[0] |= 0x4000;
+   if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+      fp->hdr[19] |= 0x1;
+   if (info->prop.fp.writesDepth) {
       fp->hdr[19] |= 0x2;
-      if (ti->scan.num_outputs > 2)
-         fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
-   } else {
-   if (ti->scan.num_outputs > 1)
-      fp->hdr[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
+      fp->flags[0] = 0x11; /* deactivate ZCULL */
    }
 
-   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_INPUT]; ++i) {
-      m = ti->interp_mode[i] & 3;
+   for (i = 0; i < info->numInputs; ++i) {
+      m = nvc0_hdr_interp_mode(&info->in[i]);
       for (c = 0; c < 4; ++c) {
-         if (!ti->input_access[i][c])
+         if (!(info->in[i].mask & (1 << c)))
             continue;
-         a = ti->input_loc[i][c] / 2;
-         if (ti->input_loc[i][c] >= 0x2c0)
-            a -= 32;
-         if (ti->input_loc[i][0] == 0x70)
-            fp->hdr[5] |= 1 << (28 + c); /* FRAG_COORD_UMASK */
-         else
-         if (ti->input_loc[i][0] == 0x2e0)
-            fp->hdr[14] |= 1 << (24 + c); /* POINT_COORD */
-         else
+         if (info->in[i].slot[0] == (0x070 / 4)) {
+            fp->hdr[5] |= 1 << (28 + c);
+         } else
+         if (info->in[i].slot[0] == (0x2e0 / 4)) {
+            if (c <= 1)
+               fp->hdr[14] |= 1 << (24 + c);
+         } else {
+            if (info->in[i].slot[c] < (0x040 / 4) ||
+                info->in[i].slot[c] > (0x380 / 4))
+               continue;
+            a = info->in[i].slot[c] * 2;
+            if (info->in[i].slot[0] >= (0x2c0 / 4))
+               a -= 32;
             fp->hdr[4 + a / 32] |= m << (a % 32);
+         }
       }
    }
 
-   for (i = 0; i <= ti->scan.file_max[TGSI_FILE_OUTPUT]; ++i) {
-      if (i != ti->fp_depth_output)
-         fp->hdr[18] |= 0xf << ti->output_loc[i][0];
+   for (i = 0; i < info->numOutputs; ++i) {
+      if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+         fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
    }
 
-   for (i = 0; i < TGSI_SEMANTIC_COUNT; ++i) {
-      a = ti->sysval_loc[i] / 2;
-      if ((a > 0) && (a < 0xf00 / 2))
-         fp->hdr[4 + a / 32] |= NVC0_INTERP_FLAT << (a % 32);
-   }
+   fp->fp.early_z = info->prop.fp.earlyFragTests;
+   if (fp->fp.early_z == FALSE && fp->code_size >= 0x400)
+      fp->fp.early_z = !(info->prop.fp.writesDepth ||
+                         info->prop.fp.usesDiscard ||
+                         (info->io.globalAccess & 2));
 
    return 0;
 }
 
-static boolean
-nvc0_prog_scan(struct nvc0_translation_info *ti)
+#ifdef DEBUG
+static void
+nvc0_program_dump(struct nvc0_program *prog)
 {
-   struct nvc0_program *prog = ti->prog;
-   struct tgsi_parse_context parse;
-   int ret;
-   unsigned i;
+   unsigned pos;
 
-#if NV50_DEBUG & NV50_DEBUG_SHADER
-   tgsi_dump(prog->pipe.tokens, 0);
+   for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
+      debug_printf("HDR[%02lx] = 0x%08x\n",
+                   pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
+
+   debug_printf("shader binary code (0x%x bytes):", prog->code_size);
+   for (pos = 0; pos < prog->code_size / 4; ++pos) {
+      if ((pos % 8) == 0)
+         debug_printf("\n");
+      debug_printf("%08x ", prog->code[pos]);
+   }
+   debug_printf("\n");
+}
 #endif
 
-   tgsi_scan_shader(prog->pipe.tokens, &ti->scan);
+boolean
+nvc0_program_translate(struct nvc0_program *prog)
+{
+   struct nv50_ir_prog_info *info;
+   int ret;
 
-   if (ti->prog->type == PIPE_SHADER_FRAGMENT) {
-      ti->fp_depth_output = 255;
-      for (i = 0; i < ti->scan.num_outputs; ++i)
-         if (ti->scan.output_semantic_name[i] == TGSI_SEMANTIC_POSITION)
-            ti->fp_depth_output = i;
-   }
+   info = CALLOC_STRUCT(nv50_ir_prog_info);
+   if (!info)
+      return FALSE;
 
-   ti->subr =
-      CALLOC(ti->scan.opcode_count[TGSI_OPCODE_BGNSUB], sizeof(ti->subr[0]));
+   info->type = prog->type;
+   info->target = 0xc0;
+   info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+   info->bin.source = (void *)prog->pipe.tokens;
 
-   ti->immd32 = (uint32_t *)MALLOC(ti->scan.immediate_count * 16);
-   ti->immd32_ty = (ubyte *)MALLOC(ti->scan.immediate_count * sizeof(ubyte));
+   info->io.clipDistanceCount = prog->vp.num_ucps;
 
-   ti->insns = MALLOC(ti->scan.num_instructions * sizeof(ti->insns[0]));
+   info->assignSlots = nvc0_program_assign_varying_slots;
 
-   tgsi_parse_init(&parse, prog->pipe.tokens);
-   while (!tgsi_parse_end_of_tokens(&parse)) {
-      tgsi_parse_token(&parse);
+#ifdef DEBUG
+   info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+   info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+   info->optLevel = 3;
+#endif
 
-      switch (parse.FullToken.Token.Type) {
-      case TGSI_TOKEN_TYPE_IMMEDIATE:
-         prog_immediate(ti, &parse.FullToken.FullImmediate);
-         break;
-      case TGSI_TOKEN_TYPE_DECLARATION:
-         prog_decl(ti, &parse.FullToken.FullDeclaration);
-         break;
-      case TGSI_TOKEN_TYPE_INSTRUCTION:
-         ti->insns[ti->num_insns] = parse.FullToken.FullInstruction;
-         prog_inst(ti, &parse.FullToken.FullInstruction, ++ti->num_insns);
-         break;
-      default:
-         break;
-      }
+   ret = nv50_ir_generate_code(info);
+   if (ret) {
+      NOUVEAU_ERR("shader translation failed: %i\n", ret);
+      goto out;
    }
 
-   for (i = 0; i < ti->num_subrs; ++i) {
-      unsigned pc = ti->subr[i].id;
-      while (ti->insns[pc].Instruction.Opcode != TGSI_OPCODE_ENDSUB)
-         prog_subroutine_inst(&ti->subr[i], &ti->insns[pc++]);
-   }
+   prog->code = info->bin.code;
+   prog->code_size = info->bin.codeSize;
+   prog->immd_data = info->immd.buf;
+   prog->immd_size = info->immd.bufSize;
+   prog->relocs = info->bin.relocData;
+   prog->max_gpr = MAX2(4, (info->bin.maxGPR + 1));
+
+   prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
 
    switch (prog->type) {
    case PIPE_SHADER_VERTEX:
-      ti->input_file = NV_FILE_MEM_A;
-      ti->output_file = NV_FILE_MEM_V;
-      ret = nvc0_vp_gen_header(prog, ti);
+      ret = nvc0_vp_gen_header(prog, info);
       break;
-      /*
-   case PIPE_SHADER_TESSELLATION_CONTROL:
-      ret = nvc0_tcp_gen_header(ti);
+#ifdef PIPE_SHADER_HULL
+   case PIPE_SHADER_HULL:
+      ret = nvc0_tcp_gen_header(prog, info);
       break;
-   case PIPE_SHADER_TESSELLATION_EVALUATION:
-      ret = nvc0_tep_gen_header(ti);
+#endif
+#ifdef PIPE_SHADER_DOMAIN
+   case PIPE_SHADER_DOMAIN:
+      ret = nvc0_tep_gen_header(prog, info);
       break;
+#endif
    case PIPE_SHADER_GEOMETRY:
-      ret = nvc0_gp_gen_header(ti);
+      ret = nvc0_gp_gen_header(prog, info);
       break;
-      */
    case PIPE_SHADER_FRAGMENT:
-      ti->input_file = NV_FILE_MEM_V;
-      ti->output_file = NV_FILE_GPR;
-
-      if (ti->scan.writes_z)
-         prog->flags[0] = 0x11; /* ? */
-      else
-      if (!ti->scan.uses_kill && !ti->global_stores)
-         prog->fp.early_z = 1;
-
-      ret = nvc0_fp_gen_header(prog, ti);
+      ret = nvc0_fp_gen_header(prog, info);
       break;
    default:
-      assert(!"unsupported program type");
       ret = -1;
+      NOUVEAU_ERR("unknown program type: %u\n", prog->type);
       break;
    }
+   if (ret)
+      goto out;
 
-   if (ti->require_stores) {
+   if (info->bin.tlsSpace) {
+      assert(info->bin.tlsSpace < (1 << 24));
       prog->hdr[0] |= 1 << 26;
-      prog->hdr[1] |= ti->temp128_nr * 16; /* l[] size */
+      prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
    }
+   if (info->io.globalAccess)
+      prog->hdr[0] |= 1 << 16;
 
-   assert(!ret);
-   return ret;
+out:
+   FREE(info);
+   return !ret;
 }
 
 boolean
-nvc0_program_translate(struct nvc0_program *prog)
+nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
 {
-   struct nvc0_translation_info *ti;
+   struct nvc0_screen *screen = nvc0->screen;
    int ret;
+   uint32_t size = prog->code_size + NVC0_SHADER_HEADER_SIZE;
+   uint32_t lib_pos = screen->lib_code->start;
+   uint32_t code_pos;
+
+   /* c[] bindings need to be aligned to 0x100, but we could use relocations
+    * to save space. */
+   if (prog->immd_size) {
+      prog->immd_base = size;
+      size = align(size, 0x40);
+      size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
+   }
+   size = align(size, 0x40); /* required by SP_START_ID */
 
-   ti = CALLOC_STRUCT(nvc0_translation_info);
-   ti->prog = prog;
+   ret = nouveau_resource_alloc(screen->text_heap, size, prog, &prog->res);
+   if (ret) {
+      NOUVEAU_ERR("out of code space\n");
+      return FALSE;
+   }
+   prog->code_base = prog->res->start;
+   prog->immd_base = align(prog->res->start + prog->immd_base, 0x100);
+   assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <
+                                     prog->res->start + prog->res->size));
 
-   ti->edgeflag_out = PIPE_MAX_SHADER_OUTPUTS;
+   code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
 
-   prog->vp.edgeflag = PIPE_MAX_ATTRIBS;
+   if (prog->relocs)
+      nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
 
-   if (prog->type == PIPE_SHADER_VERTEX && prog->vp.num_ucps)
-      ti->append_ucp = TRUE;
+#ifdef DEBUG
+   if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+      nvc0_program_dump(prog);
+#endif
 
-   ret = nvc0_prog_scan(ti);
-   if (ret) {
-      NOUVEAU_ERR("unsupported shader program\n");
-      goto out;
-   }
+   nvc0_m2mf_push_linear(&nvc0->base, screen->text, prog->code_base,
+                         NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+   nvc0_m2mf_push_linear(&nvc0->base, screen->text,
+                         prog->code_base + NVC0_SHADER_HEADER_SIZE,
+                         NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+   if (prog->immd_size)
+      nvc0_m2mf_push_linear(&nvc0->base,
+                            screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+                            prog->immd_size, prog->immd_data);
 
-   ret = nvc0_generate_code(ti);
-   if (ret)
-      NOUVEAU_ERR("shader translation failed\n");
+   BEGIN_RING(screen->base.channel, RING_3D(MEM_BARRIER), 1);
+   OUT_RING  (screen->base.channel, 0x1111);
 
-#if NV50_DEBUG & NV50_DEBUG_SHADER
-   unsigned i;
-   for (i = 0; i < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++i)
-      debug_printf("HDR[%02lx] = 0x%08x\n",
-                   i * sizeof(prog->hdr[0]), prog->hdr[i]);
-#endif
+   return TRUE;
+}
 
-out:
-   if (ti->immd32)
-      FREE(ti->immd32);
-   if (ti->immd32_ty)
-      FREE(ti->immd32_ty);
-   if (ti->insns)
-      FREE(ti->insns);
-   if (ti->subr)
-      FREE(ti->subr);
-   FREE(ti);
-   return ret ? FALSE : TRUE;
+/* Upload code for builtin functions like integer division emulation. */
+void
+nvc0_program_library_upload(struct nvc0_context *nvc0)
+{
+   struct nvc0_screen *screen = nvc0->screen;
+   int ret;
+   uint32_t size;
+   const uint32_t *code;
+
+   if (screen->lib_code)
+      return;
+
+   nv50_ir_get_target_library(screen->base.device->chipset, &code, &size);
+   if (!size)
+      return;
+
+   ret = nouveau_resource_alloc(screen->text_heap, align(size, 0x100), NULL,
+                                &screen->lib_code);
+   if (ret)
+      return;
+
+   nvc0_m2mf_push_linear(&nvc0->base,
+                         screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+                         size, code);
+   /* no need for a memory barrier, will be emitted with first program */
 }
 
 void
@@ -720,6 +641,8 @@ nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
 
    if (prog->code)
       FREE(prog->code);
+   if (prog->immd_data)
+      FREE(prog->immd_data);
    if (prog->relocs)
       FREE(prog->relocs);
 
diff --git a/src/gallium/drivers/nvc0/nvc0_program.h b/src/gallium/drivers/nvc0/nvc0_program.h
index f6fea29..239890b 100644
--- a/src/gallium/drivers/nvc0/nvc0_program.h
+++ b/src/gallium/drivers/nvc0/nvc0_program.h
@@ -3,9 +3,8 @@
 #define __NVC0_PROGRAM_H__
 
 #include "pipe/p_state.h"
-#include "tgsi/tgsi_scan.h"
 
-#define NVC0_CAP_MAX_PROGRAM_TEMPS 64
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 128
 
 #define NVC0_SHADER_HEADER_SIZE (20 * 4)
 
@@ -14,15 +13,17 @@ struct nvc0_program {
 
    ubyte type;
    boolean translated;
-   ubyte max_gpr;
+   uint8_t max_gpr;
 
    uint32_t *code;
+   uint32_t *immd_data;
    unsigned code_base;
    unsigned code_size;
-   unsigned parm_size;
-
-   uint32_t hdr[20]; /* TODO: move this into code to save space */
+   unsigned immd_base;
+   unsigned immd_size; /* size of immediate array data */
+   unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
 
+   uint32_t hdr[20];
    uint32_t flags[2];
 
    struct {
@@ -34,59 +35,14 @@ struct nvc0_program {
       uint8_t early_z;
       uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
    } fp;
+   struct {
+      uint32_t tess_mode; /* ~0 if defined by the other stage */
+      uint32_t input_patch_size;
+   } tp;
 
    void *relocs;
-   unsigned num_relocs;
 
    struct nouveau_resource *res;
 };
 
-/* first 2 bits are written into the program header, for each input */
-#define NVC0_INTERP_FLAT          (1 << 0)
-#define NVC0_INTERP_PERSPECTIVE   (2 << 0)
-#define NVC0_INTERP_LINEAR        (3 << 0)
-#define NVC0_INTERP_CENTROID      (1 << 2)
-
-/* analyze TGSI and see which TEMP[] are used as subroutine inputs/outputs */
-struct nvc0_subroutine {
-   unsigned id;
-   unsigned first_insn;
-   uint32_t argv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
-   uint32_t retv[NVC0_CAP_MAX_PROGRAM_TEMPS][4];
-};
-
-struct nvc0_translation_info {
-   struct nvc0_program *prog;
-   struct tgsi_full_instruction *insns;
-   unsigned num_insns;
-   ubyte input_file;
-   ubyte output_file;
-   ubyte fp_depth_output;
-   uint16_t input_loc[PIPE_MAX_SHADER_INPUTS][4];
-   uint16_t output_loc[PIPE_MAX_SHADER_OUTPUTS][4];
-   uint16_t sysval_loc[TGSI_SEMANTIC_COUNT];
-   boolean sysval_in[TGSI_SEMANTIC_COUNT];
-   int input_access[PIPE_MAX_SHADER_INPUTS][4];
-   int output_access[PIPE_MAX_SHADER_OUTPUTS][4];
-   ubyte interp_mode[PIPE_MAX_SHADER_INPUTS];
-   boolean indirect_inputs;
-   boolean indirect_outputs;
-   boolean require_stores;
-   boolean global_stores;
-   uint32_t *immd32;
-   ubyte *immd32_ty;
-   unsigned immd32_nr;
-   unsigned temp128_nr;
-   ubyte edgeflag_out;
-   struct nvc0_subroutine *subr;
-   unsigned num_subrs;
-   boolean append_ucp;
-   struct tgsi_shader_info scan;
-};
-
-int nvc0_generate_code(struct nvc0_translation_info *);
-
-void nvc0_relocate_program(struct nvc0_program *,
-                           uint32_t code_base, uint32_t data_base);
-
 #endif
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.c b/src/gallium/drivers/nvc0/nvc0_screen.c
index a8bd092..596a1ef 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nvc0/nvc0_screen.c
@@ -155,7 +155,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
       return 16384;
    case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
-      return 4;
+      return 16;
    case PIPE_SHADER_CAP_MAX_INPUTS:
       if (shader == PIPE_SHADER_VERTEX)
          return 32;
@@ -179,9 +179,9 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
    case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
       return 1;
    case PIPE_SHADER_CAP_SUBROUTINES:
-      return 0; /* please inline, or provide function declarations */
+      return 1; /* but inlining everything, we need function declarations */
    case PIPE_SHADER_CAP_INTEGERS:
-      return 0;
+      return 1;
    default:
       NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
       return 0;
@@ -225,6 +225,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
    nouveau_bo_ref(NULL, &screen->fence.bo);
    nouveau_bo_ref(NULL, &screen->vfetch_cache);
 
+   nouveau_resource_destroy(&screen->lib_code);
    nouveau_resource_destroy(&screen->text_heap);
 
    if (screen->tic.entries)
diff --git a/src/gallium/drivers/nvc0/nvc0_screen.h b/src/gallium/drivers/nvc0/nvc0_screen.h
index a3133b2..6780e32 100644
--- a/src/gallium/drivers/nvc0/nvc0_screen.h
+++ b/src/gallium/drivers/nvc0/nvc0_screen.h
@@ -34,6 +34,7 @@ struct nvc0_screen {
    uint64_t tls_size;
 
    struct nouveau_resource *text_heap;
+   struct nouveau_resource *lib_code; /* allocated from text_heap */
 
    struct {
       struct nouveau_bo *bo[NVC0_SCRATCH_NR_BUFFERS];
diff --git a/src/gallium/drivers/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nvc0/nvc0_shader_state.c
index 287160e..0a55812 100644
--- a/src/gallium/drivers/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nvc0/nvc0_shader_state.c
@@ -31,18 +31,37 @@ static INLINE void
 nvc0_program_update_context_state(struct nvc0_context *nvc0,
                                   struct nvc0_program *prog, int stage)
 {
+   struct nouveau_channel *chan = nvc0->screen->base.channel;
+
    if (prog->hdr[1])
       nvc0->state.tls_required |= 1 << stage;
    else
       nvc0->state.tls_required &= ~(1 << stage);
+
+   if (prog->immd_size) {
+      const unsigned rl = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+      BEGIN_RING(chan, RING_3D(CB_SIZE), 3);
+      /* NOTE: may overlap code of a different shader */
+      OUT_RING  (chan, align(prog->immd_size, 0x100));
+      OUT_RELOCh(chan, nvc0->screen->text, prog->immd_base, rl);
+      OUT_RELOCl(chan, nvc0->screen->text, prog->immd_base, rl);
+      BEGIN_RING(chan, RING_3D(CB_BIND(stage)), 1);
+      OUT_RING  (chan, (14 << 4) | 1);
+
+      nvc0->state.c14_bound |= 1 << stage;
+   } else
+   if (nvc0->state.c14_bound & (1 << stage)) {
+      BEGIN_RING(chan, RING_3D(CB_BIND(stage)), 1);
+      OUT_RING  (chan, (14 << 4) | 0);
+
+      nvc0->state.c14_bound &= ~(1 << stage);
+   }
 }
 
-static boolean
+static INLINE boolean
 nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
 {
-   int ret;
-   unsigned size;
-
    if (prog->translated)
       return TRUE;
 
@@ -50,25 +69,7 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
    if (!prog->translated)
       return FALSE;
 
-   size = align(prog->code_size + NVC0_SHADER_HEADER_SIZE, 0x100);
-
-   ret = nouveau_resource_alloc(nvc0->screen->text_heap, size, prog,
-                                &prog->res);
-   if (ret)
-      return FALSE;
-
-   prog->code_base = prog->res->start;
-
-   nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text, prog->code_base,
-                         NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
-   nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->text,
-                         prog->code_base + NVC0_SHADER_HEADER_SIZE,
-                         NOUVEAU_BO_VRAM, prog->code_size, prog->code);
-
-   BEGIN_RING(nvc0->screen->base.channel, RING_3D(MEM_BARRIER), 1);
-   OUT_RING  (nvc0->screen->base.channel, 0x1111);
-
-   return TRUE;
+   return nvc0_program_upload_code(nvc0, prog);
 }
 
 void




More information about the mesa-commit mailing list