mesa: Branch 'master' - 3 commits

Thu Feb 8 23:37:52 UTC 2007

src/mesa/drivers/dri/r200/r200_context.h     |   12 
 src/mesa/drivers/dri/r200/r200_maos.h        |    2 
 src/mesa/drivers/dri/r200/r200_maos_arrays.c |  400 ++++++---------------------
 src/mesa/drivers/dri/r200/r200_tcl.c         |   42 +-
 src/mesa/drivers/dri/r200/r200_vertprog.c    |   17 -
 src/mesa/drivers/dri/r300/r300_vertexprog.c  |   35 +-
 src/mesa/main/imports.h                      |    8 
 src/mesa/shader/arbprogparse.c               |    2 
 src/mesa/shader/program.c                    |   30 ++
 src/mesa/shader/program.h                    |    2 
 src/mesa/tnl/t_vp_build.c                    |   61 ++--
 11 files changed, 226 insertions(+), 385 deletions(-)

New commits:
diff-tree 421ce180f52ff55b866066fabd861a51dd6d2b26 (from 54dac2c84310536cce962101de29546d3eb80175)
Author: Roland Scheidegger <sroland at tungstengraphics.com>
Date:   Fri Feb 9 00:36:53 2007 +0100

    r200: simplify / unify input map handling for vp and fftnl
    
    Use the same input map handling for fftnl and vertex programs. It doesn't
    enable any new functionality (should make it easy to support per-vertex
    materials though), but the code is much cleaner.

diff --git a/src/mesa/drivers/dri/r200/r200_context.h b/src/mesa/drivers/dri/r200/r200_context.h
index 26a43d2..e840a50 100644
--- a/src/mesa/drivers/dri/r200/r200_context.h
+++ b/src/mesa/drivers/dri/r200/r200_context.h
@@ -107,8 +107,7 @@ struct r200_vertex_program {
         VERTEX_SHADER_INSTRUCTION instr[R200_VSF_MAX_INST + 6];
         int pos_end;
         int inputs[VERT_ATTRIB_MAX];
-        int rev_inputs[16];
-        int gen_inputs_mapped;
+        GLubyte inputmap_rev[16];
         int native;
         int fogpidx;
         int fogmode;
@@ -733,14 +732,7 @@ struct r200_tcl_info {
    GLuint *Elts;
 
    struct r200_dma_region indexed_verts;
-   struct r200_dma_region weight;
-   struct r200_dma_region obj;
-   struct r200_dma_region rgba;
-   struct r200_dma_region spec;
-   struct r200_dma_region fog;
-   struct r200_dma_region tex[R200_MAX_TEXTURE_UNITS];
-   struct r200_dma_region norm;
-   struct r200_dma_region generic[16];
+   struct r200_dma_region vertex_data[15];
 };
 
 
diff --git a/src/mesa/drivers/dri/r200/r200_maos.h b/src/mesa/drivers/dri/r200/r200_maos.h
index b9e4d3c..4998f67 100644
--- a/src/mesa/drivers/dri/r200/r200_maos.h
+++ b/src/mesa/drivers/dri/r200/r200_maos.h
@@ -38,7 +38,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DE
 
 #include "r200_context.h"
 
-extern void r200EmitArrays( GLcontext *ctx, GLuint inputs );
+extern void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev );
 extern void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs );
 
 #endif
diff --git a/src/mesa/drivers/dri/r200/r200_maos_arrays.c b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
index db5ac6f..6a6c30a 100644
--- a/src/mesa/drivers/dri/r200/r200_maos_arrays.c
+++ b/src/mesa/drivers/dri/r200/r200_maos_arrays.c
@@ -376,7 +376,7 @@ static void emit_vector( GLcontext *ctx,
 /* Emit any changed arrays to new GART memory, re-emit a packet to
  * update the arrays.  
  */
-void r200EmitArrays( GLcontext *ctx, GLuint inputs )
+void r200EmitArrays( GLcontext *ctx, GLubyte *vimap_rev )
 {
    r200ContextPtr rmesa = R200_CONTEXT( ctx );
    struct vertex_buffer *VB = &TNL_CONTEXT( ctx )->vb;
@@ -384,284 +384,109 @@ void r200EmitArrays( GLcontext *ctx, GLu
    GLuint nr = 0;
    GLuint vfmt0 = 0, vfmt1 = 0;
    GLuint count = VB->Count;
-   GLuint i;
-   GLuint generic_in_mapped = 0;
-   struct r200_vertex_program *vp = NULL;
+   GLuint i, emitsize;
 
-   /* this looks way more complicated than necessary... */
-   if (ctx->VertexProgram._Enabled) {
-      vp = rmesa->curr_vp_hw;
-      generic_in_mapped = vp->gen_inputs_mapped;
-   }
-
-   if (inputs & VERT_BIT_POS) {
-      if (!rmesa->tcl.obj.buf) 
-	 emit_vector( ctx, 
-		      &rmesa->tcl.obj, 
-		      (char *)VB->ObjPtr->data,
-		      VB->ObjPtr->size,
-		      VB->ObjPtr->stride,
-		      count);
-
-      switch( VB->ObjPtr->size ) {
-      case 4: vfmt0 |= R200_VTX_W0;
-      case 3: vfmt0 |= R200_VTX_Z0;
-      case 2: 
-      default:
-	 break;
-      }
-      component[nr++] = &rmesa->tcl.obj;
-   }
-   else if (generic_in_mapped & (1 << 0)) {
-      int geninput = vp->rev_inputs[0] - VERT_ATTRIB_GENERIC0;
-      if (!rmesa->tcl.generic[geninput].buf) {
-         emit_vector( ctx,
-		      &(rmesa->tcl.generic[geninput]),
-		      (char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-		      4,
-		      VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-		      count );
-      }
-      component[nr++] = &rmesa->tcl.generic[geninput];
-      vfmt0 |= R200_VTX_W0 | R200_VTX_Z0;
-   }
-
-   if (inputs & VERT_BIT_WEIGHT) {
-      if (!rmesa->tcl.weight.buf)
-	 emit_vector( ctx, 
-		      &rmesa->tcl.weight, 
-		      (char *)VB->AttribPtr[VERT_ATTRIB_WEIGHT]->data,
-		      VB->AttribPtr[VERT_ATTRIB_WEIGHT]->size,
-		      VB->AttribPtr[VERT_ATTRIB_WEIGHT]->stride,
-		      count);
-
-      assert(VB->AttribPtr[VERT_ATTRIB_WEIGHT]->size <= 4);
-      vfmt0 |= VB->AttribPtr[VERT_ATTRIB_WEIGHT]->size << R200_VTX_WEIGHT_COUNT_SHIFT;
-      component[nr++] = &rmesa->tcl.weight;
-   }
-
-   if (inputs & VERT_BIT_NORMAL) {
-      if (!rmesa->tcl.norm.buf)
-	 emit_vector( ctx, 
-		      &(rmesa->tcl.norm), 
-		      (char *)VB->NormalPtr->data,
-		      3,
-		      VB->NormalPtr->stride,
-		      count);
-
-      vfmt0 |= R200_VTX_N0;
-      component[nr++] = &rmesa->tcl.norm;
-   }
-
-   if (inputs & VERT_BIT_FOG) {
-      if (!rmesa->tcl.fog.buf) {
-	 if (ctx->VertexProgram._Enabled)
-	    emit_vector( ctx, 
-			 &(rmesa->tcl.fog), 
-			 (char *)VB->FogCoordPtr->data,
+   for ( i = 0; i < 15; i++ ) {
+      GLubyte attrib = vimap_rev[i];
+      if (attrib != 255) {
+	 switch (i) {
+	 case 0:
+	    emitsize = (VB->AttribPtr[attrib]->size);
+	    switch (emitsize) {
+	    case 4:
+	       vfmt0 |= R200_VTX_W0;
+	       /* fallthrough */
+	    case 3:
+	       vfmt0 |= R200_VTX_Z0;
+	       break;
+	    case 2:
+	       break;
+	    default: assert(0);
+	    }
+	    break;
+	 case 1:
+	    assert(attrib == VERT_ATTRIB_WEIGHT);
+	    emitsize = (VB->AttribPtr[attrib]->size);
+	    vfmt0 |= emitsize << R200_VTX_WEIGHT_COUNT_SHIFT;
+	    break;
+	 case 2:
+	    assert(attrib == VERT_ATTRIB_NORMAL);
+	    emitsize = 3;
+	    vfmt0 |= R200_VTX_N0;
+	    break;
+	 case 3:
+	    /* special handling to fix up fog. Will get us into trouble with vbos...*/
+	    assert(attrib == VERT_ATTRIB_FOG);
+	    if (!rmesa->tcl.vertex_data[i].buf) {
+	       if (ctx->VertexProgram._Enabled)
+		  emit_vector( ctx,
+			 &(rmesa->tcl.vertex_data[attrib]),
+			 (char *)VB->AttribPtr[attrib]->data,
 			 1,
-			 VB->FogCoordPtr->stride,
+			 VB->AttribPtr[attrib]->stride,
 			 count);
-	 else
-	    emit_vecfog( ctx, 
-			 &(rmesa->tcl.fog), 
-			 (char *)VB->FogCoordPtr->data,
-			 VB->FogCoordPtr->stride,
+	       else
+		  emit_vecfog( ctx,
+			 &(rmesa->tcl.vertex_data[attrib]),
+			 (char *)VB->AttribPtr[attrib]->data,
+			 VB->AttribPtr[attrib]->stride,
 			 count);
-      }
-
-      vfmt0 |= R200_VTX_DISCRETE_FOG;
-      component[nr++] = &rmesa->tcl.fog;
-   }
- 
-   if (inputs & VERT_BIT_COLOR0) {
-      int emitsize;
-
-      if (VB->ColorPtr[0]->size == 4 &&
-	  (VB->ColorPtr[0]->stride != 0 ||
-	   VB->ColorPtr[0]->data[0][3] != 1.0)) { 
-	 vfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT; 
-	 emitsize = 4;
-      }
-      else { 
-	 vfmt0 |= R200_VTX_FP_RGB << R200_VTX_COLOR_0_SHIFT; 
-	 emitsize = 3;
-      }
-
-      if (!rmesa->tcl.rgba.buf)
-	 emit_vector( ctx, 
-		      &(rmesa->tcl.rgba), 
-		      (char *)VB->ColorPtr[0]->data,
-		      emitsize,
-		      VB->ColorPtr[0]->stride,
-		      count);
-
-      component[nr++] = &rmesa->tcl.rgba;
-   }
-/*	vfmt0 |= R200_VTX_PK_RGBA << R200_VTX_COLOR_0_SHIFT;
-	emit_ubyte_rgba( ctx, &rmesa->tcl.rgba, 
-		(char *)VB->ColorPtr[0]->data, 4,
-		      VB->ColorPtr[0]->stride, count);*/
-   else if (generic_in_mapped & (1 << 2)) {
-      int geninput = vp->rev_inputs[2] - VERT_ATTRIB_GENERIC0;
-      if (!rmesa->tcl.generic[geninput].buf) {
-         emit_vector( ctx,
-		      &(rmesa->tcl.generic[geninput]),
-		      (char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-		      4,
-		      VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-		      count );
-      }
-      component[nr++] = &rmesa->tcl.generic[geninput];
-      vfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_0_SHIFT;
-   }
-
-
-   if (inputs & VERT_BIT_COLOR1) {
-      if (!rmesa->tcl.spec.buf) {
-	 emit_vector( ctx, 
-		      &rmesa->tcl.spec, 
-		      (char *)VB->SecondaryColorPtr[0]->data,
-		      3,
-		      VB->SecondaryColorPtr[0]->stride,
-		      count);
-      }
-
-      /* How does this work?
-       */
-      vfmt0 |= R200_VTX_FP_RGB << R200_VTX_COLOR_1_SHIFT; 
-      component[nr++] = &rmesa->tcl.spec;
-   }
-   else if (generic_in_mapped & (1 << 3)) {
-      int geninput = vp->rev_inputs[3] - VERT_ATTRIB_GENERIC0;
-      if (!rmesa->tcl.generic[geninput].buf) {
-         emit_vector( ctx,
-		      &(rmesa->tcl.generic[geninput]),
-		      (char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-		      4,
-		      VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-		      count );
-      }
-      component[nr++] = &rmesa->tcl.generic[geninput];
-      vfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_1_SHIFT;
-   }
-
-   if (generic_in_mapped & (1 << 4)) {
-      int geninput = vp->rev_inputs[4] - VERT_ATTRIB_GENERIC0;
-      if (!rmesa->tcl.generic[geninput].buf) {
-         emit_vector( ctx,
-		      &(rmesa->tcl.generic[geninput]),
-		      (char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-		      4,
-		      VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-		      count );
-      }
-      component[nr++] = &rmesa->tcl.generic[geninput];
-      vfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_2_SHIFT;
-   }
-
-   if (generic_in_mapped & (1 << 5)) {
-      int geninput = vp->rev_inputs[5] - VERT_ATTRIB_GENERIC0;
-      if (!rmesa->tcl.generic[geninput].buf) {
-         emit_vector( ctx,
-		      &(rmesa->tcl.generic[geninput]),
-		      (char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-		      4,
-		      VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-		      count );
-      }
-      component[nr++] = &rmesa->tcl.generic[geninput];
-      vfmt0 |= R200_VTX_FP_RGBA << R200_VTX_COLOR_3_SHIFT;
-   }
-
-   for ( i = 0 ; i < 6 ; i++ ) {
-      if (inputs & (VERT_BIT_TEX0 << i)) {
-	 if (!rmesa->tcl.tex[i].buf)
-	     emit_vector( ctx, 
-			  &(rmesa->tcl.tex[i]),
-			  (char *)VB->TexCoordPtr[i]->data,
-			  VB->TexCoordPtr[i]->size,
-			  VB->TexCoordPtr[i]->stride,
-			  count );
-
-	 vfmt1 |= VB->TexCoordPtr[i]->size << (i * 3);
-	 component[nr++] = &rmesa->tcl.tex[i];
-      }
-      else if (generic_in_mapped & (1 << (i + 6))) {
-	 int geninput = vp->rev_inputs[i + 6] - VERT_ATTRIB_GENERIC0;
-	 if (!rmesa->tcl.generic[geninput].buf) {
-            emit_vector( ctx,
-			 &(rmesa->tcl.generic[geninput]),
-			(char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-			4,
-			VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-			count );
-	 }
-	 component[nr++] = &rmesa->tcl.generic[geninput];
-	 vfmt1 |= 4 << (R200_VTX_TEX0_COMP_CNT_SHIFT + (i * 3));
-      }
-   }
-
-   if (generic_in_mapped & (1 << 13)) {
-      int geninput = vp->rev_inputs[13] - VERT_ATTRIB_GENERIC0;
-      if (!rmesa->tcl.generic[geninput].buf) {
-         emit_vector( ctx,
-		      &(rmesa->tcl.generic[geninput]),
-		      (char *)VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->data,
-		      4,
-		      VB->AttribPtr[geninput + VERT_ATTRIB_GENERIC0]->stride,
-		      count );
-      }
-      component[nr++] = &rmesa->tcl.generic[geninput];
-      vfmt0 |= R200_VTX_XY1 | R200_VTX_Z1 | R200_VTX_W1;
-   }
-
-/* doesn't work. Wrong order with mixed generic & conventional! */
-/*
-   if (ctx->VertexProgram._Enabled) {
-      int *vp_inputs = rmesa->curr_vp_hw->inputs;
-      for ( i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++ ) {
-	 if (inputs & (1 << i)) {
-	    int geninput = i - VERT_ATTRIB_GENERIC0;
-	    if (!rmesa->tcl.generic[geninput].buf) {
-	       emit_vector( ctx,
-			  &(rmesa->tcl.generic[geninput]),
-			  (char *)VB->AttribPtr[i]->data,
-			  4,
-			  VB->AttribPtr[i]->stride,
-			  count );
 	    }
-	    component[nr++] = &rmesa->tcl.generic[geninput];
-	    switch (vp_inputs[i]) {
-	    case 0:
-	       vfmt0 |=  R200_VTX_W0 | R200_VTX_Z0;
-	       break;
+	    vfmt0 |= R200_VTX_DISCRETE_FOG;
+	    goto after_emit;
+	    break;
+	 case 4:
+	 case 5:
+	 case 6:
+	 case 7:
+	    if (VB->AttribPtr[attrib]->size == 4 &&
+	       (VB->AttribPtr[attrib]->stride != 0 ||
+		VB->AttribPtr[attrib]->data[0][3] != 1.0)) emitsize = 4;
+	    else emitsize = 3;
+	    if (emitsize == 4)
+	       vfmt0 |= R200_VTX_FP_RGBA << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2);
+	    else {
+	       vfmt0 |= R200_VTX_FP_RGB << (R200_VTX_COLOR_0_SHIFT + (i - 4) * 2);
+	    }
+	    break;
+	 case 8:
+	 case 9:
+	 case 10:
+	 case 11:
+	 case 12:
+	 case 13:
+	    emitsize = VB->AttribPtr[attrib]->size;
+	    vfmt1 |= emitsize << (R200_VTX_TEX0_COMP_CNT_SHIFT + (i - 8) * 3);
+	    break;
+	 case 14:
+	    emitsize = VB->AttribPtr[attrib]->size >= 2 ? VB->AttribPtr[attrib]->size : 2;
+	    switch (emitsize) {
 	    case 2:
+	       vfmt0 |= R200_VTX_XY1;
+	       /* fallthrough */
 	    case 3:
+	       vfmt0 |= R200_VTX_Z1;
+	       /* fallthrough */
 	    case 4:
-	    case 5:
-	       vfmt0 |= R200_VTX_FP_RGBA << (R200_VTX_COLOR_0_SHIFT + (vp_inputs[i] - 2) * 2);
-	       break;
-	    case 6:
-	    case 7:
-	    case 8:
-	    case 9:
-	    case 10:
-	    case 11:
-	       vfmt1 |= 4 << (R200_VTX_TEX0_COMP_CNT_SHIFT + (vp_inputs[i] - 6) * 3);
-	       break;
-	    case 13:
-	       vfmt0 |= R200_VTX_XY1 | R200_VTX_Z1 | R200_VTX_W1;
-	       break;
-	    case 1:
-	    case 12:
-	    default:
-	       assert(0);
+	       vfmt0 |= R200_VTX_W1;
+	    break;
 	    }
+	 default:
+	    assert(0);
 	 }
+	 if (!rmesa->tcl.vertex_data[i].buf) {
+	    emit_vector( ctx,
+			 &(rmesa->tcl.vertex_data[i]),
+			 (char *)VB->AttribPtr[attrib]->data,
+			 emitsize,
+			 VB->AttribPtr[attrib]->stride,
+			 count );
+	 }
+after_emit:
+	 assert(nr < 12);
+	 component[nr++] = &rmesa->tcl.vertex_data[i];
       }
    }
-*/
 
    if (vfmt0 != rmesa->hw.vtx.cmd[VTX_VTXFMT_0] ||
        vfmt1 != rmesa->hw.vtx.cmd[VTX_VTXFMT_1]) {
@@ -676,42 +501,13 @@ void r200EmitArrays( GLcontext *ctx, GLu
 
 void r200ReleaseArrays( GLcontext *ctx, GLuint newinputs )
 {
-   GLuint unit;
    r200ContextPtr rmesa = R200_CONTEXT( ctx );
 
-/*    if (R200_DEBUG & DEBUG_VERTS)  */
-/*       _tnl_print_vert_flags( __FUNCTION__, newinputs ); */
-
-   if (newinputs & VERT_BIT_POS) 
-     r200ReleaseDmaRegion( rmesa, &rmesa->tcl.obj, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_WEIGHT) 
-     r200ReleaseDmaRegion( rmesa, &rmesa->tcl.weight, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_NORMAL) 
-      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.norm, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_FOG) 
-      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.fog, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_COLOR0) 
-      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.rgba, __FUNCTION__ );
-
-   if (newinputs & VERT_BIT_COLOR1) 
-      r200ReleaseDmaRegion( rmesa, &rmesa->tcl.spec, __FUNCTION__ );
-
-   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
-      if (newinputs & VERT_BIT_TEX(unit))
-	 r200ReleaseDmaRegion( rmesa, &rmesa->tcl.tex[unit], __FUNCTION__ );
+   /* only do it for changed inputs ? */
+   int i;
+   for (i = 0; i < 15; i++) {
+      if (newinputs & (1 << i))
+	 r200ReleaseDmaRegion( rmesa,
+	    &rmesa->tcl.vertex_data[i], __FUNCTION__ );
    }
-
-   if (ctx->VertexProgram._Enabled) {
-      int i;
-      for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
-	 if (newinputs & (1 << i))
-	    r200ReleaseDmaRegion( rmesa,
-	       &rmesa->tcl.generic[i - VERT_ATTRIB_GENERIC0], __FUNCTION__ );
-      }
-   }
-
 }
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index 62c335a..e0c32b2 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -384,8 +384,14 @@ static GLboolean r200_run_tcl_render( GL
    r200ContextPtr rmesa = R200_CONTEXT(ctx);
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct vertex_buffer *VB = &tnl->vb;
-   GLuint inputs = 0;
    GLuint i;
+   GLubyte *vimap_rev;
+/* use hw fixed order for simplicity, pos 0, weight 1, normal 2, fog 3, 
+   color0 - color3 4-7, texcoord0 - texcoord5 8-13, pos 1 14. Must not use
+   more than 12 of those at the same time. */
+   GLubyte map_rev_fixed[15] = {255, 255, 255, 255, 255, 255, 255, 255,
+			    255, 255, 255, 255, 255, 255, 255};
+
 
    /* TODO: separate this from the swtnl pipeline 
     */
@@ -404,30 +410,40 @@ static GLboolean r200_run_tcl_render( GL
       r200ValidateState( ctx );
 
    if (!ctx->VertexProgram._Enabled) {
-      inputs = VERT_BIT_POS | VERT_BIT_COLOR0;
    /* NOTE: inputs != tnl->render_inputs - these are the untransformed
     * inputs.
     */
+      map_rev_fixed[0] = VERT_ATTRIB_POS;
+      /* technically there is no reason we always need VA_COLOR0. In theory
+         could disable it depending on lighting, color materials, texturing... */
+      map_rev_fixed[4] = VERT_ATTRIB_COLOR0;
+
       if (ctx->Light.Enabled) {
-	 inputs |= VERT_BIT_NORMAL;
+	 map_rev_fixed[2] = VERT_ATTRIB_NORMAL;
       }
 
+      /* this also enables VA_COLOR1 when using separate specular
+         lighting model, which is unnecessary.
+         FIXME: OTOH, we're missing the case where a ATI_fragment_shader accesses
+         the secondary color (if lighting is disabled). The chip seems
+         misconfigured for that though elsewhere (tcl output, might lock up) */
       if (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR) {
-	 inputs |= VERT_BIT_COLOR1;
+	 map_rev_fixed[5] = VERT_ATTRIB_COLOR1;
       }
 
       if ( (ctx->Fog.FogCoordinateSource == GL_FOG_COORD) && ctx->Fog.Enabled ) {
-	 inputs |= VERT_BIT_FOG;
+	 map_rev_fixed[3] = VERT_ATTRIB_FOG;
       }
 
       for (i = 0 ; i < ctx->Const.MaxTextureUnits; i++) {
 	 if (ctx->Texture.Unit[i]._ReallyEnabled) {
 	    if (rmesa->TexGenNeedNormals[i]) {
-	       inputs |= VERT_BIT_NORMAL;
+	       map_rev_fixed[2] = VERT_ATTRIB_NORMAL;
 	    }
-	    inputs |= VERT_BIT_TEX(i);
+	    map_rev_fixed[8 + i] = VERT_ATTRIB_TEX0 + i;
 	 }
       }
+      vimap_rev = &map_rev_fixed[0];
    }
    else {
       /* vtx_tcl_output_vtxfmt_0/1 need to match configuration of "fragment
@@ -437,14 +453,8 @@ static GLboolean r200_run_tcl_render( GL
 	 We only need to change compsel. */
       GLuint out_compsel = 0;
       GLuint vp_out = rmesa->curr_vp_hw->mesa_program.Base.OutputsWritten;
-#if 0
-      /* can't handle other inputs, generic attribs etc. currently - should never arrive here */
-      assert ((rmesa->curr_vp_hw->mesa_program.Base.InputsRead &
-	 ~(VERT_BIT_POS | VERT_BIT_NORMAL | VERT_BIT_COLOR0 | VERT_BIT_COLOR1 |
-	  VERT_BIT_FOG | VERT_BIT_TEX0 | VERT_BIT_TEX1 | VERT_BIT_TEX2 |
-	  VERT_BIT_TEX3 | VERT_BIT_TEX4 | VERT_BIT_TEX5)) == 0);
-#endif
-      inputs |= rmesa->curr_vp_hw->mesa_program.Base.InputsRead;
+
+      vimap_rev = &rmesa->curr_vp_hw->inputmap_rev[0];
       assert(vp_out & (1 << VERT_RESULT_HPOS));
       out_compsel = R200_OUTPUT_XYZW;
       if (vp_out & (1 << VERT_RESULT_COL0)) {
@@ -473,7 +483,7 @@ static GLboolean r200_run_tcl_render( GL
    /* Do the actual work:
     */
    r200ReleaseArrays( ctx, ~0 /* stage->changed_inputs */ );
-   r200EmitArrays( ctx, inputs );
+   r200EmitArrays( ctx, vimap_rev );
 
    rmesa->tcl.Elts = VB->Elts;
 
diff --git a/src/mesa/drivers/dri/r200/r200_vertprog.c b/src/mesa/drivers/dri/r200/r200_vertprog.c
index 713e2f9..4960d48 100644
--- a/src/mesa/drivers/dri/r200/r200_vertprog.c
+++ b/src/mesa/drivers/dri/r200/r200_vertprog.c
@@ -405,7 +405,6 @@ static GLboolean r200_translate_vertex_p
    int dofogfix = 0;
    int fog_temp_i = 0;
    int free_inputs;
-   int free_inputs_conv;
    int array_count = 0;
 
    vp->native = GL_FALSE;
@@ -477,6 +476,8 @@ static GLboolean r200_translate_vertex_p
 
    for(i = 0; i < VERT_ATTRIB_MAX; i++)
       vp->inputs[i] = -1;
+   for(i = 0; i < 15; i++)
+      vp->inputmap_rev[i] = 255;
    free_inputs = 0x2ffd;
 
 /* fglrx uses fixed inputs as follows for conventional attribs.
@@ -499,38 +500,45 @@ static GLboolean r200_translate_vertex_p
 /* may look different when using idx buf / input_route instead of se_vtx_fmt? */
    if (mesa_vp->Base.InputsRead & VERT_BIT_POS) {
       vp->inputs[VERT_ATTRIB_POS] = 0;
+      vp->inputmap_rev[0] = VERT_ATTRIB_POS;
       free_inputs &= ~(1 << 0);
       array_count++;
    }
    if (mesa_vp->Base.InputsRead & VERT_BIT_WEIGHT) {
       vp->inputs[VERT_ATTRIB_WEIGHT] = 12;
+      vp->inputmap_rev[1] = VERT_ATTRIB_WEIGHT;
       array_count++;
    }
    if (mesa_vp->Base.InputsRead & VERT_BIT_NORMAL) {
       vp->inputs[VERT_ATTRIB_NORMAL] = 1;
+      vp->inputmap_rev[2] = VERT_ATTRIB_NORMAL;
       array_count++;
    }
    if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR0) {
       vp->inputs[VERT_ATTRIB_COLOR0] = 2;
+      vp->inputmap_rev[4] = VERT_ATTRIB_COLOR0;
       free_inputs &= ~(1 << 2);
       array_count++;
    }
    if (mesa_vp->Base.InputsRead & VERT_BIT_COLOR1) {
       vp->inputs[VERT_ATTRIB_COLOR1] = 3;
+      vp->inputmap_rev[5] = VERT_ATTRIB_COLOR1;
       free_inputs &= ~(1 << 3);
       array_count++;
    }
    if (mesa_vp->Base.InputsRead & VERT_BIT_FOG) {
       vp->inputs[VERT_ATTRIB_FOG] = 15; array_count++;
+      vp->inputmap_rev[3] = VERT_ATTRIB_FOG;
+      array_count++;
    }
    for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX5; i++) {
       if (mesa_vp->Base.InputsRead & (1 << i)) {
 	 vp->inputs[i] = i - VERT_ATTRIB_TEX0 + 6;
+	 vp->inputmap_rev[8 + i - VERT_ATTRIB_TEX0] = i;
 	 free_inputs &= ~(1 << (i - VERT_ATTRIB_TEX0 + 6));
 	 array_count++;
       }
    }
-   free_inputs_conv = free_inputs;
    /* using VERT_ATTRIB_TEX6/7 would be illegal */
    /* completely ignore aliasing? */
    for (i = VERT_ATTRIB_GENERIC0; i < VERT_ATTRIB_MAX; i++) {
@@ -549,13 +557,14 @@ static GLboolean r200_translate_vertex_p
 	    if (free_inputs & (1 << j)) {
 	       free_inputs &= ~(1 << j);
 	       vp->inputs[i] = j;
-	       vp->rev_inputs[j] = i;
+	       if (j == 0) vp->inputmap_rev[j] = i; /* mapped to pos */
+	       else if (j < 12) vp->inputmap_rev[j + 2] = i; /* mapped to col/tex */
+	       else vp->inputmap_rev[j + 1] = i; /* mapped to pos1 */
 	       break;
 	    }
 	 }
       }
    }
-   vp->gen_inputs_mapped = free_inputs ^ free_inputs_conv;
 
    if (!(mesa_vp->Base.OutputsWritten & (1 << VERT_RESULT_HPOS))) {
       if (R200_DEBUG & DEBUG_FALLBACKS) {
diff-tree 54dac2c84310536cce962101de29546d3eb80175 (from 6cf892eeb6edd69d4ba77d4ececa21a09ba317c4)
Author: Roland Scheidegger <sroland at tungstengraphics.com>
Date:   Fri Feb 9 00:36:40 2007 +0100

    optimize generated vertex programs a bit
    
    Use new internal state to avoid per-vertex normalization of static spot
    direction vector. Use internal state for simpler per-vertex fog computations
    (MAD instead of SUB/MUL for linear fog, EX2 instead of POW for EXP/EXP2 fog).
    Simplify point size calc (2 MADs instead of MOV, MUL, MUL, DP3), and while
    there fix it up (RSQ instead of RCP). All untested...

diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index d9885db..0633b3b 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -138,6 +138,14 @@ typedef union { GLfloat f; GLint i; } fi
 #define M_E (2.7182818284590452354)
 #endif
 
+#ifndef ONE_DIV_LN2
+#define ONE_DIV_LN2 (1.442695040888963456)
+#endif
+
+#ifndef ONE_DIV_SQRT_LN2
+#define ONE_DIV_SQRT_LN2 (1.201122408786449815)
+#endif
+
 #ifndef FLT_MAX_EXP
 #define FLT_MAX_EXP 128
 #endif
diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c
index 72d4909..b9ff08d 100644
--- a/src/mesa/shader/arbprogparse.c
+++ b/src/mesa/shader/arbprogparse.c
@@ -4101,7 +4101,7 @@ _mesa_parse_arb_vertex_program(GLcontext
    program->Base.Parameters = ap.Base.Parameters; 
 
 #if DEBUG_VP
-   _mesa_printf("____________Vertex program %u __________\n", program->Base.ID);
+   _mesa_printf("____________Vertex program %u __________\n", program->Base.Id);
    _mesa_print_program(&program->Base);
 #endif
 }
diff --git a/src/mesa/shader/program.c b/src/mesa/shader/program.c
index d301f19..7e6cd26 100644
--- a/src/mesa/shader/program.c
+++ b/src/mesa/shader/program.c
@@ -996,6 +996,30 @@ _mesa_fetch_state(GLcontext *ctx, const 
 	       }
                break;
 	    }
+	    case STATE_FOG_PARAMS_OPTIMIZED:
+	       /* this makes it possible to use simpler per-vertex fog calcs. POW
+		  (for EXP/EXP2 fog) might be more expensive than EX2 on some hw,
+		  plus it needs another constant (e) anyway. Linear fog can now be
+		  done with a single MAD.
+		  linear: fogcoord * -1/(end-start) + end/(end-start)
+		  exp: 2^-(density/ln(2) * fogcoord)
+		  exp2: 2^-((density/(ln(2)^2) * fogcoord)^2) */
+	       value[0] = -1.0F / (ctx->Fog.End - ctx->Fog.Start);
+	       value[1] = ctx->Fog.End / (ctx->Fog.End - ctx->Fog.Start);
+	       value[2] = ctx->Fog.Density * ONE_DIV_LN2;
+	       value[3] = ctx->Fog.Density * ONE_DIV_SQRT_LN2;
+	       break;
+	    case STATE_SPOT_DIR_NORMALIZED: {
+	       /* here, state[2] is the light number */
+	       /* pre-normalize spot dir */
+	       const GLuint ln = (GLuint) state[2];
+	       value[0] = ctx->Light.Light[ln].EyeDirection[0];
+	       value[1] = ctx->Light.Light[ln].EyeDirection[1];
+	       value[2] = ctx->Light.Light[ln].EyeDirection[2];
+	       NORMALIZE_3FV(value);
+	       value[3] = ctx->Light.Light[ln]._CosCutoff;
+	       break;
+	    }
 	    default:
 	       /* unknown state indexes are silently ignored
 	       *  should be handled by the driver.
@@ -1075,6 +1099,10 @@ make_state_flags(const GLint state[])
 	 return _NEW_MODELVIEW;
       case STATE_TEXRECT_SCALE:
 	 return _NEW_TEXTURE;
+      case STATE_FOG_PARAMS_OPTIMIZED:
+	 return _NEW_FOG;
+      case STATE_SPOT_DIR_NORMALIZED:
+	 return _NEW_LIGHT;
       default:
          /* unknown state indexes are silently ignored and
          *  no flag set, since it is handled by the driver.
@@ -1232,6 +1260,8 @@ append_token(char *dst, enum state_index
    case STATE_INTERNAL:
    case STATE_NORMAL_SCALE:
    case STATE_POSITION_NORMALIZED:
+   case STATE_FOG_PARAMS_OPTIMIZED:
+   case STATE_SPOT_DIR_NORMALIZED:
       append(dst, "(internal)");
       break;
    default:
diff --git a/src/mesa/shader/program.h b/src/mesa/shader/program.h
index af06c03..a0bde07 100644
--- a/src/mesa/shader/program.h
+++ b/src/mesa/shader/program.h
@@ -190,6 +190,8 @@ enum state_index {
    STATE_NORMAL_SCALE,
    STATE_TEXRECT_SCALE,
    STATE_POSITION_NORMALIZED,   /* normalized light position */
+   STATE_FOG_PARAMS_OPTIMIZED,  /* for faster fog calc */
+   STATE_SPOT_DIR_NORMALIZED,   /* pre-normalized spot dir */
    STATE_INTERNAL_DRIVER	/* first available state index for drivers (must be last) */
 };
 
diff --git a/src/mesa/tnl/t_vp_build.c b/src/mesa/tnl/t_vp_build.c
index 805d05a..0b6f506 100644
--- a/src/mesa/tnl/t_vp_build.c
+++ b/src/mesa/tnl/t_vp_build.c
@@ -806,14 +806,13 @@ static struct ureg calculate_light_atten
    /* Calculate spot attenuation:
     */
    if (!p->state->unit[i].light_spotcutoff_is_180) {
-      struct ureg spot_dir = register_param3(p, STATE_LIGHT, i,
-					     STATE_SPOT_DIRECTION);
+      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
+						  STATE_SPOT_DIR_NORMALIZED, i);
       struct ureg spot = get_temp(p);
       struct ureg slt = get_temp(p);
-	       
-      emit_normalize_vec3( p, spot, spot_dir ); /* XXX: precompute! */
-      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot);
-      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir,W), spot);
+
+      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
+      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
 
@@ -1103,29 +1102,26 @@ static void build_fog( struct tnl_progra
    }
 
    if (p->state->tnl_do_vertex_fog) {
-      struct ureg params = register_param1(p, STATE_FOG_PARAMS);
+      struct ureg params = register_param1(p, STATE_FOG_PARAMS_OPTIMIZED);
       struct ureg tmp = get_temp(p);
 
       switch (p->state->fog_mode) {
       case FOG_LINEAR: {
 	 struct ureg id = get_identity_param(p);
-	 emit_op2(p, OPCODE_SUB, tmp, 0, swizzle1(params,Z), input); 
-	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,W)); 
+	 emit_op3(p, OPCODE_MAD, tmp, 0, input, swizzle1(params,X), swizzle1(params,Y));
 	 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */
 	 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W));
 	 break;
       }
       case FOG_EXP:
 	 emit_op1(p, OPCODE_ABS, tmp, 0, input); 
-	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,X)); 
-	 emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, 
-		  register_const1f(p, M_E), negate(tmp)); 
+	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, swizzle1(params,Z));
+	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
 	 break;
       case FOG_EXP2:
-	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,X)); 
+	 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W));
 	 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 
-	 emit_op2(p, OPCODE_POW, fog, WRITEMASK_X, 
-		  register_const1f(p, M_E), negate(tmp)); 
+	 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp));
 	 break;
       }
       
@@ -1317,8 +1313,6 @@ static void build_texture_transform( str
 }
 
 
-/* Seems like it could be tighter:
- */
 static void build_pointsize( struct tnl_program *p )
 {
    struct ureg eye = get_eye_position(p);
@@ -1327,20 +1321,25 @@ static void build_pointsize( struct tnl_
    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
    struct ureg ut = get_temp(p);
 
-   /* 1, -Z, Z * Z, 1 */      
-   emit_op1(p, OPCODE_MOV, ut, 0, swizzle1(get_identity_param(p), W));
-   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_YZ, ut, negate(swizzle1(eye, Z)));
-   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_Z, ut, negate(swizzle1(eye, Z)));
-
-
-   /* p1 +  p2 * dist + p3 * dist * dist, 0 */
-   emit_op2(p, OPCODE_DP3, ut, 0, ut, state_attenuation);
-
-   /* 1 / factor */
-   emit_op1(p, OPCODE_RCP, ut, 0, ut ); 
-
-   /* out = pointSize / factor */
-   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 
+   /* p1 + dist * (p2 + dist * p3); */
+   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
+   emit_op3(p, OPCODE_MAD, ut, 0, negate(swizzle1(eye, Z)),
+		ut, swizzle1(state_attenuation, X));
+
+   /* 1 / sqrt(factor) */
+   emit_op1(p, OPCODE_RSQ, ut, 0, ut );
+
+#if 1
+   /* out = pointSize / sqrt(factor) */
+   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
+#else
+   /* not sure, might make sense to do clamping here,
+      but it's not done in t_vb_points neither */
+   emit_op2(p, OPCODE_MUL, ut, 0, ut, state_size);
+   emit_op2(p, OPCODE_MAX, ut, 0, ut, swizzle1(state_size, Y));
+   emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
+#endif
 
    release_temp(p, ut);
 }
diff-tree 6cf892eeb6edd69d4ba77d4ececa21a09ba317c4 (from f46c19d965fb05a49d361aa251e37b5ef32dd839)
Author: Roland Scheidegger <sroland at tungstengraphics.com>
Date:   Thu Feb 8 23:54:28 2007 +0100

    fix issues in vp when using FRAG_BIT_WPOS in a fragment program (bug #9910)
    
    Redirect all VERT_RESULT_HPOS writes to a temp and use that for fixup.
    The viewport transformation still seems to take some shortcuts, and it
    still does not seem to work at all...

diff --git a/src/mesa/drivers/dri/r300/r300_vertexprog.c b/src/mesa/drivers/dri/r300/r300_vertexprog.c
index c08c987..2ff92e1 100644
--- a/src/mesa/drivers/dri/r300/r300_vertexprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertexprog.c
@@ -960,26 +960,23 @@ static void position_invariant(struct gl
 
 static void insert_wpos(struct r300_vertex_program *vp,
 		       struct gl_program *prog,
-		       GLint pos)
+		       GLuint temp_index)
 {
 
 	GLint tokens[6] = { STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0, 0 };
 	struct prog_instruction *vpi;
 	struct prog_instruction *vpi_insert;
-	GLuint temp_index;
 	GLuint window_index;
 	int i = 0;
 	
 	vpi = malloc((prog->NumInstructions + 5) * sizeof(struct prog_instruction));
-	memcpy(vpi, prog->Instructions, (pos+1) * sizeof(struct prog_instruction));
+	/* all but END */
+	memcpy(vpi, prog->Instructions, (prog->NumInstructions - 1) * sizeof(struct prog_instruction));
+	/* END */
+	memcpy(&vpi[prog->NumInstructions + 4], &prog->Instructions[prog->NumInstructions - 1],
+		sizeof(struct prog_instruction));
 	
-	vpi_insert = &vpi[pos];
-
-	/* make a copy before outputting VERT_RESULT_HPOS */
-	vpi_insert->DstReg.File = vpi_insert->SrcReg[2].File;
-	vpi_insert->DstReg.Index = temp_index = vpi_insert->SrcReg[2].Index;
-	
-	vpi_insert++;
+	vpi_insert = &vpi[prog->NumInstructions - 1];
 	memset(vpi_insert, 0, 5 * sizeof(struct prog_instruction));
 
 	vpi_insert[i].Opcode = OPCODE_MOV;
@@ -1062,8 +1059,6 @@ static void insert_wpos(struct r300_vert
 	vpi_insert[i].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_ONE);
 	i++;
 
-	memcpy(&vpi_insert[i], &prog->Instructions[pos+1], (prog->NumInstructions-(pos+1)) * sizeof(struct prog_instruction));
-
 	free(prog->Instructions);
 
 	prog->Instructions = vpi;
@@ -1072,24 +1067,24 @@ static void insert_wpos(struct r300_vert
 	vpi = &prog->Instructions[prog->NumInstructions-1];
 
 	assert(vpi->Opcode == OPCODE_END);
-	/* we need position, don't we ? :) */
-	prog->InputsRead |= (1 << VERT_ATTRIB_POS);
 }
 
 static void pos_as_texcoord(struct r300_vertex_program *vp,
 			    struct gl_program *prog)
 {
 	struct prog_instruction *vpi;
-	int pos = 0;
-	
-	for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++, pos++){
+	GLuint tempregi = prog->NumTemporaries;
+	/* should do something else if no temps left... */
+	prog->NumTemporaries++;
+
+	for(vpi = prog->Instructions; vpi->Opcode != OPCODE_END; vpi++){
 		if( vpi->DstReg.File == PROGRAM_OUTPUT &&
 		    vpi->DstReg.Index == VERT_RESULT_HPOS ){
-			insert_wpos(vp, prog, pos);
-			break;
+			vpi->DstReg.File = PROGRAM_TEMPORARY;
+			vpi->DstReg.Index = tempregi;
 		}
 	}
-
+	insert_wpos(vp, prog, tempregi);
 }
 
 static struct r300_vertex_program *build_program(struct r300_vertex_program_key *wanted_key,