mesa: Branch 'index-swtnl-0.1' - 5 commits

Keith Whitwell keithw at kemper.freedesktop.org
Wed Mar 14 19:07:21 UTC 2007


 src/mesa/drivers/dri/i915tex/i830_vtbl.c         |    2 
 src/mesa/drivers/dri/i915tex/i915_context.h      |   12 
 src/mesa/drivers/dri/i915tex/i915_state.c        |    2 
 src/mesa/drivers/dri/i915tex/i915_vtbl.c         |  366 +++++++++++++++--------
 src/mesa/drivers/dri/i915tex/intel_batchbuffer.c |  128 ++++++--
 src/mesa/drivers/dri/i915tex/intel_batchbuffer.h |   91 ++++-
 src/mesa/drivers/dri/i915tex/intel_context.c     |    2 
 src/mesa/drivers/dri/i915tex/intel_tris.c        |   17 -
 8 files changed, 446 insertions(+), 174 deletions(-)

New commits:
diff-tree c5cf7073859dd91e3ff6a2a693eb347faf76dd49 (from b806180a2a024b4b7a4e3fe32ea33773160ef054)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date:   Wed Mar 14 19:00:11 2007 +0000

    Turn off lots of debug.  Enable dynamic indirect state.

diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index bb9c60f..49c0142 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -165,11 +165,6 @@ static GLuint emit_indirect(struct intel
    GLuint delta;
    GLuint segment;
 
-   if (!state) {
-      segment = 0; 
-      goto out;
-   }
-
    switch (flag) {
    case LI0_STATE_DYNAMIC_INDIRECT:
       segment = SEGMENT_DYNAMIC_INDIRECT;
@@ -180,11 +175,12 @@ static GLuint emit_indirect(struct intel
        * like.
        */
       delta = ((intel->batch->segment_finish_offset[segment] + size - 4) |
-	       DIS0_BUFFER_VALID |
-	 DIS0_BUFFER_RESET);
+	       DIS0_BUFFER_VALID | 
+	       DIS0_BUFFER_RESET);
+
 
       BEGIN_BATCH(2,0);
-      OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14));
+      OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14) | 0);
       OUT_RELOC( intel->batch->buffer, 
 		 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
 		 DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
@@ -215,7 +211,6 @@ static GLuint emit_indirect(struct intel
       break;
    }
 
- out:
    { 
       GLuint offset = intel->batch->segment_finish_offset[segment];
       intel->batch->segment_finish_offset[segment] += size;
@@ -223,23 +218,11 @@ static GLuint emit_indirect(struct intel
       if (state != NULL)
 	 memcpy(intel->batch->map + offset, state, size);
 
-      _mesa_printf("returning offset 0x%x\n", offset);
       return offset;
    }
 }
 
 
-static void emit(struct intel_context *intel, 
-		 const GLuint *state, GLuint size )
-{
-   GLint i;
-
-   BEGIN_BATCH( size/4, 0 );
-   for (i = 0; i < size/4; i++)
-      OUT_BATCH( state[i] );
-   ADVANCE_BATCH( );
-}
-
 static void
 i915_emit_invarient_state(struct intel_context *intel)
 {
@@ -313,19 +296,9 @@ i915_emit_invarient_state(struct intel_c
 
       (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0)
    };
-
-   /* Do this once for initialization.  Not really needed if we do
-    * other indirect state later.
-    */
-#if 0
-   BEGIN_BATCH(2, 0);
-   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-#endif
    
    emit_indirect( intel,
-		  LI0_STATE_STATIC_INDIRECT,
+ 		  LI0_STATE_STATIC_INDIRECT, 
 		  invarient_state,
 		  sizeof(invarient_state) );
 }
@@ -386,7 +359,7 @@ get_state_size(struct i915_hw_state *sta
 }
 
 #define OUT(x) do {				\
-  _mesa_printf("OUT(0x%08x)\n", x);		\
+  if (0) _mesa_printf("OUT(0x%08x)\n", x);		\
  *p++ = (x);					\
 } while(0)
 
@@ -424,13 +397,16 @@ i915_emit_state(struct intel_context *in
     * restart.
     */
    if (dirty & (I915_UPLOAD_INVARIENT | I915_UPLOAD_BUFFERS)) {
-      fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+      if (INTEL_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+
       i915_emit_invarient_state(intel);
 
-      fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+      if (INTEL_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
 
-      /* This needs to go in dynamic indirect state, once that is
-       * working...
+      /* Does this go in dynamic indirect state, or static indirect
+       * state???
        */
       BEGIN_BATCH(3, 0);
       OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
@@ -458,7 +434,7 @@ i915_emit_state(struct intel_context *in
       ADVANCE_BATCH();
 
 #if 0
-      /* What happens to scissor?
+      /* Where does scissor go?
        */
       OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
       OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
@@ -468,8 +444,11 @@ i915_emit_state(struct intel_context *in
    }
 
    if (dirty & I915_UPLOAD_CTX) {
-      fprintf(stderr, "I915_UPLOAD_CTX:\n");
+      if (INTEL_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "I915_UPLOAD_CTX:\n");
 
+      /* Immediate state: always goes in the batchbuffer.
+       */
       BEGIN_BATCH(5, 0);
       OUT_BATCH(state->Ctx[I915_CTXREG_LI]);
       OUT_BATCH(state->Ctx[I915_CTXREG_LIS2]);
@@ -477,20 +456,11 @@ i915_emit_state(struct intel_context *in
       OUT_BATCH(state->Ctx[I915_CTXREG_LIS5]);
       OUT_BATCH(state->Ctx[I915_CTXREG_LIS6]);
       ADVANCE_BATCH();
-
-#if 0
+      
       emit_indirect(intel, 
 		    LI0_STATE_DYNAMIC_INDIRECT,
 		    state->Ctx + I915_CTXREG_STATE4, 
 		    4 * sizeof(GLuint) );
-#else
-      BEGIN_BATCH(4, 0); 
-      OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]); 
-      OUT_BATCH(state->Ctx[I915_CTXREG_IAB]); 
-      OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]); 
-      OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]); 
-      ADVANCE_BATCH();
-#endif
    }
 
 
@@ -506,11 +476,14 @@ i915_emit_state(struct intel_context *in
          if (dirty & I915_UPLOAD_TEX(i))
             nr++;
 
+      /* A bit of a nasty kludge so that we can setup the relocation
+       * information for the buffer address in the indirect state
+       * packet:
+       */
       offset = emit_indirect(intel, 
 			     LI0_STATE_MAP,
 			     NULL,
 			     (2 + nr * 3) * sizeof(GLuint) );
-
       
       p = (GLuint *)(intel->batch->map + offset);
       
@@ -540,7 +513,9 @@ i915_emit_state(struct intel_context *in
 
 
 
-      fprintf(stderr, "UPLOAD SAMPLERS:\n");
+      if (INTEL_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "UPLOAD SAMPLERS:\n");
+
       offset = emit_indirect(intel, 
 			     LI0_STATE_SAMPLER,
 			     NULL,
@@ -562,16 +537,13 @@ i915_emit_state(struct intel_context *in
    }
 
    if (dirty & I915_UPLOAD_PROGRAM) {
-      fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+      if (INTEL_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
 
       assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
 
-#if 1
       emit_indirect(intel, LI0_STATE_PROGRAM,
 		    state->Program, state->ProgramSize * sizeof(GLuint));
-#else
-      emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
-#endif
 
       if (INTEL_DEBUG & DEBUG_STATE)
          i915_disassemble_program(state->Program, state->ProgramSize);
@@ -579,13 +551,11 @@ i915_emit_state(struct intel_context *in
 
 
    if (dirty & I915_UPLOAD_CONSTANTS) {
-      fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
-#if 1
+      if (INTEL_DEBUG & DEBUG_STATE)
+	 fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+
       emit_indirect(intel, LI0_STATE_CONSTANTS,
 		    state->Constant, state->ConstantSize * sizeof(GLuint));
-#else
-      emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
-#endif
    }
 
 
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index acac170..dd6e416 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -235,11 +235,13 @@ do_flush_locked(struct intel_batchbuffer
       struct buffer_reloc *r = &batch->reloc[i];
 
       ptr[r->offset / 4] = driBOOffset(r->buf) + r->delta;
-      _mesa_printf("reloc offset %x value 0x%x + 0x%x\n",
-		   r->offset, driBOOffset(r->buf), r->delta);
+      
+      if (INTEL_DEBUG & DEBUG_BATCH) 
+	 _mesa_printf("reloc offset %x value 0x%x + 0x%x\n",
+		      r->offset, driBOOffset(r->buf), r->delta);
    }
 
-/*    if (INTEL_DEBUG & DEBUG_BATCH) */
+    if (INTEL_DEBUG & DEBUG_BATCH) 
       intel_dump_batchbuffer(batch, ptr);
 
    driBOUnmap(batch->buffer);
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 6bbbf33..7bf705e 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -147,29 +147,29 @@ intel_batchbuffer_require_space(struct i
 #define BEGIN_BATCH_SEGMENT(seg, n, flags) do {				\
    assert(!intel->prim.flush);					\
    intel_batchbuffer_require_space(intel->batch, seg, (n)*4, flags);	\
-   _mesa_printf("BEGIN_BATCH(%d,%d,%d) in %s\n", seg, n, flags, __FUNCTION__); \
+   if (0) _mesa_printf("BEGIN_BATCH(%d,%d,%d) in %s\n", seg, n, flags, __FUNCTION__); \
 } while (0)
 
 #define OUT_BATCH_SEGMENT(seg, d) do {				\
-      _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, d);  		\
+      if (0) _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, d);  		\
       intel_batchbuffer_emit_dword(intel->batch, seg, d);	\
 } while (0)
 
 #define OUT_BATCH_F_SEGMENT(seg, fl) do {			\
    fi_type fi;					\
    fi.f = fl;					\
-   _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, fi.i);  \
+   if (0) _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, fi.i);  \
    intel_batchbuffer_emit_dword(intel->batch, seg, fi.i);	\
 } while (0)
 
 #define OUT_RELOC_SEGMENT(seg, buf,flags,mask,delta) do {				\
    assert((delta) >= 0);						\
-   _mesa_printf("OUT_RELOC( seg %d buf %p offset %x )\n", seg, buf, delta);		\
+   if (0) _mesa_printf("OUT_RELOC( seg %d buf %p offset %x )\n", seg, buf, delta);		\
    intel_batchbuffer_emit_reloc(intel->batch, seg, buf, flags, mask, delta);	\
 } while (0)
 
 #define ADVANCE_BATCH_SEGMENT(seg) do { \
-   _mesa_printf("ADVANCE_BATCH()\n");		\
+   if (0) _mesa_printf("ADVANCE_BATCH()\n");		\
 } while(0)
 
 
diff-tree b806180a2a024b4b7a4e3fe32ea33773160ef054 (from 8b4f4abc391dc8881b5c01b528a9794c4b04590a)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date:   Wed Mar 14 18:58:55 2007 +0000

    Fix off-by-one in LOAD_IMMEDIATE packet size

diff --git a/src/mesa/drivers/dri/i915tex/i915_state.c b/src/mesa/drivers/dri/i915tex/i915_state.c
index 78ae4bd..1fafadc 100644
--- a/src/mesa/drivers/dri/i915tex/i915_state.c
+++ b/src/mesa/drivers/dri/i915tex/i915_state.c
@@ -859,7 +859,7 @@ i915_init_packets(struct i915_context *i
       i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
                                          I1_LOAD_S(2) |
                                          I1_LOAD_S(4) |
-                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (4));
+                                         I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
       i915->state.Ctx[I915_CTXREG_LIS2] = 0;
       i915->state.Ctx[I915_CTXREG_LIS4] = 0;
       i915->state.Ctx[I915_CTXREG_LIS5] = 0;
diff-tree 8b4f4abc391dc8881b5c01b528a9794c4b04590a (from 61c7591f24abe67ee4148d16ca30b9c6d5b6b4c6)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date:   Wed Mar 14 15:40:11 2007 +0000

    Emit sampler and map indirect state.

diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index e2f271a..bb9c60f 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -157,15 +157,18 @@ i915_check_vertex_size(struct intel_cont
    return sz == expected;
 }
 
-static GLuint *emit_indirect(struct intel_context *intel, 
-			     GLuint flag,
-			     const GLuint *state,
-			     GLuint size )
+static GLuint emit_indirect(struct intel_context *intel, 
+			    GLuint flag,
+			    const GLuint *state,
+			    GLuint size )
 {
-   GLint i;
    GLuint delta;
    GLuint segment;
-   GLuint *ptr;
+
+   if (!state) {
+      segment = 0; 
+      goto out;
+   }
 
    switch (flag) {
    case LI0_STATE_DYNAMIC_INDIRECT:
@@ -211,17 +214,23 @@ static GLuint *emit_indirect(struct inte
       
       break;
    }
-	      
-   /* Now emit the indirect state.  XXX: better not flush! 
-    */
-   BEGIN_BATCH_SEGMENT( segment, size/4, 0 );
-   for (i = 0; i < size/4; i++)
-      OUT_BATCH_SEGMENT( segment, state[i] );
-   ADVANCE_BATCH_SEGMENT( segment );
+
+ out:
+   { 
+      GLuint offset = intel->batch->segment_finish_offset[segment];
+      intel->batch->segment_finish_offset[segment] += size;
+      
+      if (state != NULL)
+	 memcpy(intel->batch->map + offset, state, size);
+
+      _mesa_printf("returning offset 0x%x\n", offset);
+      return offset;
+   }
 }
 
 
-static void emit(struct intel_context *intel, const GLuint *state, GLuint size )
+static void emit(struct intel_context *intel, 
+		 const GLuint *state, GLuint size )
 {
    GLint i;
 
@@ -308,15 +317,15 @@ i915_emit_invarient_state(struct intel_c
    /* Do this once for initialization.  Not really needed if we do
     * other indirect state later.
     */
+#if 0
    BEGIN_BATCH(2, 0);
    OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
    OUT_BATCH(0);
    ADVANCE_BATCH();
-
+#endif
    
-
    emit_indirect( intel,
- 		  LI0_STATE_STATIC_INDIRECT, 
+		  LI0_STATE_STATIC_INDIRECT,
 		  invarient_state,
 		  sizeof(invarient_state) );
 }
@@ -376,6 +385,10 @@ get_state_size(struct i915_hw_state *sta
    return sz;
 }
 
+#define OUT(x) do {				\
+  _mesa_printf("OUT(0x%08x)\n", x);		\
+ *p++ = (x);					\
+} while(0)
 
 /* Push the state into the sarea and/or texture memory.
  */
@@ -415,8 +428,9 @@ i915_emit_state(struct intel_context *in
       i915_emit_invarient_state(intel);
 
       fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
-      /* This state cannot be handled by the hardware binner.  There
-       * is no need to put it in an indirect buffer.
+
+      /* This needs to go in dynamic indirect state, once that is
+       * working...
        */
       BEGIN_BATCH(3, 0);
       OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
@@ -444,7 +458,7 @@ i915_emit_state(struct intel_context *in
       ADVANCE_BATCH();
 
 #if 0
-      /* Scissoring not allowed - what to do about this? 
+      /* What happens to scissor?
        */
       OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
       OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
@@ -465,16 +479,17 @@ i915_emit_state(struct intel_context *in
       ADVANCE_BATCH();
 
 #if 0
-      emit_indirect(intel, LI0_STATE_DYNAMIC_INDIRECT,
+      emit_indirect(intel, 
+		    LI0_STATE_DYNAMIC_INDIRECT,
 		    state->Ctx + I915_CTXREG_STATE4, 
 		    4 * sizeof(GLuint) );
 #else
-       BEGIN_BATCH(4, 0); 
-       OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]); 
-       OUT_BATCH(state->Ctx[I915_CTXREG_IAB]); 
-       OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]); 
-       OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]); 
-       ADVANCE_BATCH();
+      BEGIN_BATCH(4, 0); 
+      OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]); 
+      OUT_BATCH(state->Ctx[I915_CTXREG_IAB]); 
+      OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]); 
+      OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]); 
+      ADVANCE_BATCH();
 #endif
    }
 
@@ -483,53 +498,67 @@ i915_emit_state(struct intel_context *in
     * avoid lockups on I915 hardware. 
     */
    if (dirty & I915_UPLOAD_TEX_ALL) {
-      assert(0);
-#if 0
-      GLuint buf[2 + I915_TEX_UNITS * 3];
-      int nr = 0;
+      GLuint offset;
+      GLuint *p;
+      int i, nr = 0;
 
       for (i = 0; i < I915_TEX_UNITS; i++)
          if (dirty & I915_UPLOAD_TEX(i))
             nr++;
 
-      fprintf(stderr, "UPLOAD MAPS:\n");
+      offset = emit_indirect(intel, 
+			     LI0_STATE_MAP,
+			     NULL,
+			     (2 + nr * 3) * sizeof(GLuint) );
+
+      
+      p = (GLuint *)(intel->batch->map + offset);
+      
+      OUT(_3DSTATE_MAP_STATE | (3 * nr));
+      OUT((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
 
-      BEGIN_STATIC(2 + nr * 3, LI0_STATE_MAP);
-      OUT_STATIC(_3DSTATE_MAP_STATE | (3 * nr));
-      OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
       for (i = 0; i < I915_TEX_UNITS; i++)
-         if (dirty & I915_UPLOAD_TEX(i)) {
+	 if (dirty & I915_UPLOAD_TEX(i)) {
+	    if (state->tex_buffer[i]) {	  
+	       intel_batchbuffer_set_reloc( intel->batch,
+					    ((GLubyte *)p) - intel->batch->map,
+					    state->tex_buffer[i],
+					    DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+					    DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
+					    state->tex_offset[i]);
+	       OUT(0);		/* placeholder */
+	    }
+	    else {
+	       assert(i == 0);
+	       assert(state == &i915->meta);
+	       OUT(0);
+	    }
+
+	    OUT(state->Tex[i][I915_TEXREG_MS3]);
+	    OUT(state->Tex[i][I915_TEXREG_MS4]);
+	 }
 
-            if (state->tex_buffer[i]) {
-               OUT_STATIC_RELOC(state->tex_buffer[i],
-				  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-				  DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
-				  state->tex_offset[i]);
-            }
-            else {
-               assert(i == 0);
-               assert(state == &i915->meta);
-               OUT_STATIC(0);
-            }
-
-            OUT_STATIC(state->Tex[i][I915_TEXREG_MS3]);
-            OUT_STATIC(state->Tex[i][I915_TEXREG_MS4]);
-         }
-      ADVANCE_STATIC();
 
 
       fprintf(stderr, "UPLOAD SAMPLERS:\n");
-      BEGIN_STATIC(2 + nr * 3, LI0_STATE_SAMPLER);
-      OUT_STATIC(_3DSTATE_SAMPLER_STATE | (3 * nr));
-      OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
-      for (i = 0; i < I915_TEX_UNITS; i++)
-         if (dirty & I915_UPLOAD_TEX(i)) {
-            OUT_STATIC(state->Tex[i][I915_TEXREG_SS2]);
-            OUT_STATIC(state->Tex[i][I915_TEXREG_SS3]);
-            OUT_STATIC(state->Tex[i][I915_TEXREG_SS4]);
-         }
-      ADVANCE_STATIC();
-#endif
+      offset = emit_indirect(intel, 
+			     LI0_STATE_SAMPLER,
+			     NULL,
+			     (2 + nr * 3) * sizeof(GLuint) );
+
+      
+      p = (GLuint *)(intel->batch->map + offset);
+
+
+      OUT(_3DSTATE_SAMPLER_STATE | (3 * nr));
+      OUT((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      for (i = 0; i < I915_TEX_UNITS; i++) {
+	 if (dirty & I915_UPLOAD_TEX(i)) {
+	    OUT(state->Tex[i][I915_TEXREG_SS2]);
+	    OUT(state->Tex[i][I915_TEXREG_SS3]);
+	    OUT(state->Tex[i][I915_TEXREG_SS4]);
+	 }
+      }
    }
 
    if (dirty & I915_UPLOAD_PROGRAM) {
diff-tree 61c7591f24abe67ee4148d16ca30b9c6d5b6b4c6 (from 0b43da5227a85d2488df78d233b974ccf5f1afc5)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date:   Wed Mar 14 15:39:11 2007 +0000

    Add intel_batchbuffer_set_reloc - add relocation at arbitary offset

diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index 5d64344..acac170 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -87,7 +87,8 @@ intel_dump_batchbuffer(struct intel_batc
 {
    GLuint *ptr = (GLuint *)map;
    GLuint count = batch->segment_finish_offset[0];
-   GLuint buf = driBOOffset(batch->buffer);
+   GLuint buf0 = driBOOffset(batch->buffer);
+   GLuint buf = buf0;;
 
    fprintf(stderr, "\n\n\nIMMEDIATE: (%d)\n", count / 4);
    dump( buf, ptr, count/4 );
@@ -95,6 +96,7 @@ intel_dump_batchbuffer(struct intel_batc
 
    count = batch->segment_finish_offset[1] - batch->segment_start_offset[1];
    ptr = (GLuint *)(map + batch->segment_start_offset[1]);
+   buf = buf0 + batch->segment_start_offset[1];
 
    fprintf(stderr, "\n\n\nDYNAMIC: (%d)\n", count / 4);
    dump( buf, ptr, count/4 );
@@ -102,6 +104,7 @@ intel_dump_batchbuffer(struct intel_batc
 
    count = batch->segment_finish_offset[2] - batch->segment_start_offset[2];
    ptr = (GLuint *)(map + batch->segment_start_offset[2]);
+   buf = buf0 + batch->segment_start_offset[2];
 
    fprintf(stderr, "\n\n\nOTHER INDIRECT: (%d)\n", count / 4);
    dump( buf, ptr, count/4 );
@@ -232,6 +235,8 @@ do_flush_locked(struct intel_batchbuffer
       struct buffer_reloc *r = &batch->reloc[i];
 
       ptr[r->offset / 4] = driBOOffset(r->buf) + r->delta;
+      _mesa_printf("reloc offset %x value 0x%x + 0x%x\n",
+		   r->offset, driBOOffset(r->buf), r->delta);
    }
 
 /*    if (INTEL_DEBUG & DEBUG_BATCH) */
@@ -358,12 +363,13 @@ intel_batchbuffer_finish(struct intel_ba
 /*  This is the only way buffers get added to the validate list.
  */
 GLboolean
-intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
-			     GLuint segment,
-                             struct _DriBufferObject *buffer,
-                             GLuint flags, GLuint mask, GLuint delta)
+intel_batchbuffer_set_reloc(struct intel_batchbuffer *batch,
+			    GLuint offset,
+			    struct _DriBufferObject *buffer,
+			    GLuint flags, GLuint mask, GLuint delta)
 {
    assert(batch->nr_relocs < MAX_RELOCS);
+   assert((offset & 3) == 0);
 
    if (buffer != batch->buffer)
       driBOAddListItem(&batch->list, buffer, flags, mask);
@@ -375,10 +381,24 @@ intel_batchbuffer_emit_reloc(struct inte
 	 driBOReference(buffer);
 
       r->buf = buffer;
-      r->offset = batch->segment_finish_offset[segment];
+      r->offset = offset;
       r->delta = delta;
    }
 
+   return GL_TRUE;
+}
+
+
+GLboolean
+intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+			     GLuint segment,
+                             struct _DriBufferObject *buffer,
+                             GLuint flags, GLuint mask, GLuint delta)
+{
+   intel_batchbuffer_set_reloc( batch,
+				batch->segment_finish_offset[segment],
+				buffer, flags, mask, delta );
+
    batch->segment_finish_offset[segment] += 4;
    return GL_TRUE;
 }
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 8b7f988..6bbbf33 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -84,6 +84,12 @@ void intel_batchbuffer_release_space(str
 				     GLuint segment,
                                      GLuint bytes);
 
+GLboolean
+intel_batchbuffer_set_reloc(struct intel_batchbuffer *batch,
+			    GLuint offset,
+			    struct _DriBufferObject *buffer,
+			    GLuint flags, GLuint mask, GLuint delta);
+
 GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
 				       GLuint segment,
                                        struct _DriBufferObject *buffer,
diff-tree 0b43da5227a85d2488df78d233b974ccf5f1afc5 (from 682fdd9462485562e3534293a12ccebc41d3c3ed)
Author: Keith Whitwell <keith at tungstengraphics.com>
Date:   Wed Mar 14 14:49:18 2007 +0000

    Experiments with indirect state.
    
    Split the batchbuffer into 3 segments for the three different types
    of state (immediate, dynamic-indirect, other-indirect).  This is
    primarily so that relocations continue to work without too many changes.
    
    Most stuff broken, but trivial/tri.c works.  Dynamic indirect state not
    working.

diff --git a/src/mesa/drivers/dri/i915tex/i830_vtbl.c b/src/mesa/drivers/dri/i915tex/i830_vtbl.c
index dd0670d..509a369 100644
--- a/src/mesa/drivers/dri/i915tex/i830_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i830_vtbl.c
@@ -426,7 +426,7 @@ i830_emit_state(struct intel_context *in
     * scheduling is allowed, rather than assume that it is whenever a
     * batchbuffer fills up.
     */
-   intel_batchbuffer_require_space(intel->batch, get_state_size(state), 0);
+   intel_batchbuffer_require_space(intel->batch, 0, get_state_size(state), 0);
 
    /* Do this here as we may have flushed the batchbuffer above,
     * causing more state to be dirty!
diff --git a/src/mesa/drivers/dri/i915tex/i915_context.h b/src/mesa/drivers/dri/i915tex/i915_context.h
index d2713e8..f594304 100644
--- a/src/mesa/drivers/dri/i915tex/i915_context.h
+++ b/src/mesa/drivers/dri/i915tex/i915_context.h
@@ -66,12 +66,12 @@
 #define I915_DESTREG_SR2 11
 #define I915_DEST_SETUP_SIZE 12
 
-#define I915_CTXREG_STATE4		0
-#define I915_CTXREG_LI	        	1
-#define I915_CTXREG_LIS2		        2
-#define I915_CTXREG_LIS4	        	3
-#define I915_CTXREG_LIS5	        	4
-#define I915_CTXREG_LIS6	         	5
+#define I915_CTXREG_LI	        	0
+#define I915_CTXREG_LIS2		1
+#define I915_CTXREG_LIS4	        2
+#define I915_CTXREG_LIS5	        3
+#define I915_CTXREG_LIS6	        4
+#define I915_CTXREG_STATE4		5
 #define I915_CTXREG_IAB   	 	6
 #define I915_CTXREG_BLENDCOLOR0		7
 #define I915_CTXREG_BLENDCOLOR1		8
diff --git a/src/mesa/drivers/dri/i915tex/i915_vtbl.c b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
index 52db9a9..e2f271a 100644
--- a/src/mesa/drivers/dri/i915tex/i915_vtbl.c
+++ b/src/mesa/drivers/dri/i915tex/i915_vtbl.c
@@ -157,71 +157,171 @@ i915_check_vertex_size(struct intel_cont
    return sz == expected;
 }
 
+static GLuint *emit_indirect(struct intel_context *intel, 
+			     GLuint flag,
+			     const GLuint *state,
+			     GLuint size )
+{
+   GLint i;
+   GLuint delta;
+   GLuint segment;
+   GLuint *ptr;
+
+   switch (flag) {
+   case LI0_STATE_DYNAMIC_INDIRECT:
+      segment = SEGMENT_DYNAMIC_INDIRECT;
+
+      /* Dynamic indirect state is different - tell it the ending
+       * address, it will execute from either the previous end address
+       * or the beginning of the 4k page, depending on what it feels
+       * like.
+       */
+      delta = ((intel->batch->segment_finish_offset[segment] + size - 4) |
+	       DIS0_BUFFER_VALID |
+	 DIS0_BUFFER_RESET);
+
+      BEGIN_BATCH(2,0);
+      OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14));
+      OUT_RELOC( intel->batch->buffer, 
+		 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
+		 DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
+		 delta );
+      ADVANCE_BATCH();
+      break;
 
-static void
-i915_emit_invarient_state(struct intel_context *intel)
-{
-   BATCH_LOCALS;
-
-   BEGIN_BATCH(200, 0);
-
-   OUT_BATCH(_3DSTATE_AA_CMD |
-             AA_LINE_ECAAR_WIDTH_ENABLE |
-             AA_LINE_ECAAR_WIDTH_1_0 |
-             AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0);
+   default:
+      segment = SEGMENT_OTHER_INDIRECT;
 
-   OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
-   OUT_BATCH(0);
+      /* Other state is more conventional: tell the hardware the start
+       * point and size.
+       */
+      delta = (intel->batch->segment_finish_offset[segment] |
+	       SIS0_FORCE_LOAD | /* XXX: fix me */
+	       SIS0_BUFFER_VALID);
+
+      BEGIN_BATCH(3,0);
+      OUT_BATCH( _3DSTATE_LOAD_INDIRECT | flag | (1<<14) | 1);
+      OUT_RELOC( intel->batch->buffer, 
+		 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE,
+		 DRM_BO_MASK_MEM | DRM_BO_FLAG_EXE,
+		 delta );
+      OUT_BATCH( (size/4)-1 );
+      ADVANCE_BATCH();
 
-   OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
-   OUT_BATCH(0);
+      
+      break;
+   }
+	      
+   /* Now emit the indirect state.  XXX: better not flush! 
+    */
+   BEGIN_BATCH_SEGMENT( segment, size/4, 0 );
+   for (i = 0; i < size/4; i++)
+      OUT_BATCH_SEGMENT( segment, state[i] );
+   ADVANCE_BATCH_SEGMENT( segment );
+}
 
-   OUT_BATCH(_3DSTATE_DFLT_Z_CMD);
-   OUT_BATCH(0);
 
-   /* Don't support texture crossbar yet */
-   OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
-             CSB_TCB(0, 0) |
-             CSB_TCB(1, 1) |
-             CSB_TCB(2, 2) |
-             CSB_TCB(3, 3) |
-             CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
-
-   OUT_BATCH(_3DSTATE_RASTER_RULES_CMD |
-             ENABLE_POINT_RASTER_RULE |
-             OGL_POINT_RASTER_RULE |
-             ENABLE_LINE_STRIP_PROVOKE_VRTX |
-             ENABLE_TRI_FAN_PROVOKE_VRTX |
-             LINE_STRIP_PROVOKE_VRTX(1) |
-             TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D);
+static void emit(struct intel_context *intel, const GLuint *state, GLuint size )
+{
+   GLint i;
 
-   /* Need to initialize this to zero.
-    */
-   OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (1));
-   OUT_BATCH(0);
+   BEGIN_BATCH( size/4, 0 );
+   for (i = 0; i < size/4; i++)
+      OUT_BATCH( state[i] );
+   ADVANCE_BATCH( );
+}
 
-   /* XXX: Use this */
-   OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
+static void
+i915_emit_invarient_state(struct intel_context *intel)
+{
+   static GLuint invarient_state[] = {
 
-   OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
+      (_3DSTATE_AA_CMD |
+       AA_LINE_ECAAR_WIDTH_ENABLE |
+       AA_LINE_ECAAR_WIDTH_1_0 |
+       AA_LINE_REGION_WIDTH_ENABLE | 
+       AA_LINE_REGION_WIDTH_1_0),
+
+      /* Could use these to reduce the size of vertices when the incoming
+       * array is constant.
+       */
+      (_3DSTATE_DFLT_DIFFUSE_CMD),
+      (0),
+
+      (_3DSTATE_DFLT_SPEC_CMD),
+      (0),
+
+      (_3DSTATE_DFLT_Z_CMD),
+      (0),
+
+      /* We support texture crossbar via the fragment shader, rather than
+       * with this mechanism.
+       */
+      (_3DSTATE_COORD_SET_BINDINGS |
+       CSB_TCB(0, 0) |
+       CSB_TCB(1, 1) |
+       CSB_TCB(2, 2) |
+       CSB_TCB(3, 3) |
+       CSB_TCB(4, 4) |
+       CSB_TCB(5, 5) | 
+       CSB_TCB(6, 6) | 
+       CSB_TCB(7, 7)),
+
+      /* Setup OpenGL rasterization state:
+       */
+      (_3DSTATE_RASTER_RULES_CMD |
+       ENABLE_POINT_RASTER_RULE |
+       OGL_POINT_RASTER_RULE |
+       ENABLE_LINE_STRIP_PROVOKE_VRTX |
+       ENABLE_TRI_FAN_PROVOKE_VRTX |
+       LINE_STRIP_PROVOKE_VRTX(1) |
+       TRI_FAN_PROVOKE_VRTX(2) | 
+       ENABLE_TEXKILL_3D_4D | 
+       TEXKILL_4D),
+
+      /* Need to initialize this to zero.
+       */
+      (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | 
+       I1_LOAD_S(3) | 
+       (1)),
+      (0),
+
+      (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT),
+      (_3DSTATE_SCISSOR_RECT_0_CMD),
+      (0),
+      (0),
+
+      /* Turn off stipple for now
+       */
+      _3DSTATE_STIPPLE,
+      0,
+
+      /* For private depth buffers but shared color buffers, eg
+       * front-buffer rendering with a private depthbuffer.  We don't do
+       * this.
+       */
+      (_3DSTATE_DEPTH_SUBRECT_DISABLE),
 
-   OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
+      (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0)
+   };
 
-   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);       /* disable indirect state */
+   /* Do this once for initialization.  Not really needed if we do
+    * other indirect state later.
+    */
+   BEGIN_BATCH(2, 0);
+   OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0);
    OUT_BATCH(0);
+   ADVANCE_BATCH();
 
+   
 
-   /* Don't support twosided stencil yet */
-   OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0);
-
-   ADVANCE_BATCH();
+   emit_indirect( intel,
+ 		  LI0_STATE_STATIC_INDIRECT, 
+		  invarient_state,
+		  sizeof(invarient_state) );
 }
 
 
-#define emit(intel, state, size )		     \
-   intel_batchbuffer_data(intel->batch, state, size, 0 )
 
 static GLuint
 get_dirty(struct i915_hw_state *state)
@@ -284,7 +384,6 @@ i915_emit_state(struct intel_context *in
 {
    struct i915_context *i915 = i915_context(&intel->ctx);
    struct i915_hw_state *state = i915->current;
-   int i;
    GLuint dirty;
    BATCH_LOCALS;
 
@@ -295,7 +394,8 @@ i915_emit_state(struct intel_context *in
     * scheduling is allowed, rather than assume that it is whenever a
     * batchbuffer fills up.
     */
-   intel_batchbuffer_require_space(intel->batch, get_state_size(state), 0);
+   intel_batchbuffer_require_space(intel->batch, 0,
+				   get_state_size(state), 0);
 
    /* Do this here as we may have flushed the batchbuffer above,
     * causing more state to be dirty!
@@ -305,122 +405,161 @@ i915_emit_state(struct intel_context *in
    if (INTEL_DEBUG & DEBUG_STATE)
       fprintf(stderr, "%s dirty: %x\n", __FUNCTION__, dirty);
 
-   if (dirty & I915_UPLOAD_INVARIENT) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
+   /* This should not change during a scene for HWZ, correct?
+    *
+    * If it does change, we probably have to flush everything and
+    * restart.
+    */
+   if (dirty & (I915_UPLOAD_INVARIENT | I915_UPLOAD_BUFFERS)) {
+      fprintf(stderr, "I915_UPLOAD_INVARIENT:\n");
       i915_emit_invarient_state(intel);
-   }
 
-   if (dirty & I915_UPLOAD_CTX) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_CTX:\n");
-
-      emit(intel, state->Ctx, sizeof(state->Ctx));
-   }
-
-   if (dirty & I915_UPLOAD_BUFFERS) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
-      BEGIN_BATCH(I915_DEST_SETUP_SIZE + 2, 0);
+      fprintf(stderr, "I915_UPLOAD_BUFFERS:\n");
+      /* This state cannot be handled by the hardware binner.  There
+       * is no need to put it in an indirect buffer.
+       */
+      BEGIN_BATCH(3, 0);
       OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR0]);
       OUT_BATCH(state->Buffer[I915_DESTREG_CBUFADDR1]);
       OUT_RELOC(state->draw_region->buffer,
                 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                 DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE,
                 state->draw_region->draw_offset);
+      ADVANCE_BATCH();
 
       if (state->depth_region) {
+	 BEGIN_BATCH(3, 0);
          OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR0]);
          OUT_BATCH(state->Buffer[I915_DESTREG_DBUFADDR1]);
          OUT_RELOC(state->depth_region->buffer,
                    DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
                    DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE,
                    state->depth_region->draw_offset);
+	 ADVANCE_BATCH();
       }
 
+      BEGIN_BATCH(2, 0);
       OUT_BATCH(state->Buffer[I915_DESTREG_DV0]);
       OUT_BATCH(state->Buffer[I915_DESTREG_DV1]);
+      ADVANCE_BATCH();
+
+#if 0
+      /* Scissoring not allowed - what to do about this? 
+       */
       OUT_BATCH(state->Buffer[I915_DESTREG_SENABLE]);
       OUT_BATCH(state->Buffer[I915_DESTREG_SR0]);
       OUT_BATCH(state->Buffer[I915_DESTREG_SR1]);
       OUT_BATCH(state->Buffer[I915_DESTREG_SR2]);
-      ADVANCE_BATCH();
+#endif
    }
 
-   if (dirty & I915_UPLOAD_STIPPLE) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_STIPPLE:\n");
-      emit(intel, state->Stipple, sizeof(state->Stipple));
-   }
+   if (dirty & I915_UPLOAD_CTX) {
+      fprintf(stderr, "I915_UPLOAD_CTX:\n");
 
-   if (dirty & I915_UPLOAD_FOG) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_FOG:\n");
-      emit(intel, state->Fog, sizeof(state->Fog));
+      BEGIN_BATCH(5, 0);
+      OUT_BATCH(state->Ctx[I915_CTXREG_LI]);
+      OUT_BATCH(state->Ctx[I915_CTXREG_LIS2]);
+      OUT_BATCH(state->Ctx[I915_CTXREG_LIS4]);
+      OUT_BATCH(state->Ctx[I915_CTXREG_LIS5]);
+      OUT_BATCH(state->Ctx[I915_CTXREG_LIS6]);
+      ADVANCE_BATCH();
+
+#if 0
+      emit_indirect(intel, LI0_STATE_DYNAMIC_INDIRECT,
+		    state->Ctx + I915_CTXREG_STATE4, 
+		    4 * sizeof(GLuint) );
+#else
+       BEGIN_BATCH(4, 0); 
+       OUT_BATCH(state->Ctx[I915_CTXREG_STATE4]); 
+       OUT_BATCH(state->Ctx[I915_CTXREG_IAB]); 
+       OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR0]); 
+       OUT_BATCH(state->Ctx[I915_CTXREG_BLENDCOLOR1]); 
+       ADVANCE_BATCH();
+#endif
    }
 
+
    /* Combine all the dirty texture state into a single command to
     * avoid lockups on I915 hardware. 
     */
    if (dirty & I915_UPLOAD_TEX_ALL) {
+      assert(0);
+#if 0
+      GLuint buf[2 + I915_TEX_UNITS * 3];
       int nr = 0;
 
       for (i = 0; i < I915_TEX_UNITS; i++)
          if (dirty & I915_UPLOAD_TEX(i))
             nr++;
 
-      BEGIN_BATCH(2 + nr * 3, 0);
-      OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr));
-      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      fprintf(stderr, "UPLOAD MAPS:\n");
+
+      BEGIN_STATIC(2 + nr * 3, LI0_STATE_MAP);
+      OUT_STATIC(_3DSTATE_MAP_STATE | (3 * nr));
+      OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
       for (i = 0; i < I915_TEX_UNITS; i++)
          if (dirty & I915_UPLOAD_TEX(i)) {
 
             if (state->tex_buffer[i]) {
-               OUT_RELOC(state->tex_buffer[i],
-                         DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
-                         DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
-                         state->tex_offset[i]);
+               OUT_STATIC_RELOC(state->tex_buffer[i],
+				  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+				  DRM_BO_MASK_MEM | DRM_BO_FLAG_READ,
+				  state->tex_offset[i]);
             }
             else {
                assert(i == 0);
                assert(state == &i915->meta);
-               OUT_BATCH(0);
+               OUT_STATIC(0);
             }
 
-            OUT_BATCH(state->Tex[i][I915_TEXREG_MS3]);
-            OUT_BATCH(state->Tex[i][I915_TEXREG_MS4]);
+            OUT_STATIC(state->Tex[i][I915_TEXREG_MS3]);
+            OUT_STATIC(state->Tex[i][I915_TEXREG_MS4]);
          }
-      ADVANCE_BATCH();
+      ADVANCE_STATIC();
+
 
-      BEGIN_BATCH(2 + nr * 3, 0);
-      OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * nr));
-      OUT_BATCH((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
+      fprintf(stderr, "UPLOAD SAMPLERS:\n");
+      BEGIN_STATIC(2 + nr * 3, LI0_STATE_SAMPLER);
+      OUT_STATIC(_3DSTATE_SAMPLER_STATE | (3 * nr));
+      OUT_STATIC((dirty & I915_UPLOAD_TEX_ALL) >> I915_UPLOAD_TEX_0_SHIFT);
       for (i = 0; i < I915_TEX_UNITS; i++)
          if (dirty & I915_UPLOAD_TEX(i)) {
-            OUT_BATCH(state->Tex[i][I915_TEXREG_SS2]);
-            OUT_BATCH(state->Tex[i][I915_TEXREG_SS3]);
-            OUT_BATCH(state->Tex[i][I915_TEXREG_SS4]);
+            OUT_STATIC(state->Tex[i][I915_TEXREG_SS2]);
+            OUT_STATIC(state->Tex[i][I915_TEXREG_SS3]);
+            OUT_STATIC(state->Tex[i][I915_TEXREG_SS4]);
          }
-      ADVANCE_BATCH();
-   }
-
-   if (dirty & I915_UPLOAD_CONSTANTS) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
-      emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
+      ADVANCE_STATIC();
+#endif
    }
 
    if (dirty & I915_UPLOAD_PROGRAM) {
-      if (INTEL_DEBUG & DEBUG_STATE)
-         fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
+      fprintf(stderr, "I915_UPLOAD_PROGRAM:\n");
 
       assert((state->Program[0] & 0x1ff) + 2 == state->ProgramSize);
 
+#if 1
+      emit_indirect(intel, LI0_STATE_PROGRAM,
+		    state->Program, state->ProgramSize * sizeof(GLuint));
+#else
       emit(intel, state->Program, state->ProgramSize * sizeof(GLuint));
+#endif
+
       if (INTEL_DEBUG & DEBUG_STATE)
          i915_disassemble_program(state->Program, state->ProgramSize);
    }
 
+
+   if (dirty & I915_UPLOAD_CONSTANTS) {
+      fprintf(stderr, "I915_UPLOAD_CONSTANTS:\n");
+#if 1
+      emit_indirect(intel, LI0_STATE_CONSTANTS,
+		    state->Constant, state->ConstantSize * sizeof(GLuint));
+#else
+      emit(intel, state->Constant, state->ConstantSize * sizeof(GLuint));
+#endif
+   }
+
+
    state->emitted |= dirty;
 }
 
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
index 309ecf9..5d64344 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.c
@@ -66,15 +66,45 @@
  * server automatically waits on its own dma to complete before
  * modifying cliprects ???
  */
-
-static void
-intel_dump_batchbuffer(GLuint offset, GLuint * ptr, GLuint count)
+static void dump(GLuint offset, GLuint *ptr, GLuint count)
 {
-   int i;
-   fprintf(stderr, "\n\n\nSTART BATCH (%d dwords):\n", count / 4);
-   for (i = 0; i < count / 4; i += 4)
+   GLuint i;
+
+#if 0
+   for (i = 0; i < count; i += 4)
       fprintf(stderr, "0x%x:\t0x%08x 0x%08x 0x%08x 0x%08x\n",
               offset + i * 4, ptr[i], ptr[i + 1], ptr[i + 2], ptr[i + 3]);
+#else
+   for (i = 0; i < count; i++)
+      fprintf(stderr, "0x%x:\t0x%08x\n",
+              offset + i * 4, ptr[i]);
+#endif
+}
+
+
+static void
+intel_dump_batchbuffer(struct intel_batchbuffer *batch, GLubyte *map)
+{
+   GLuint *ptr = (GLuint *)map;
+   GLuint count = batch->segment_finish_offset[0];
+   GLuint buf = driBOOffset(batch->buffer);
+
+   fprintf(stderr, "\n\n\nIMMEDIATE: (%d)\n", count / 4);
+   dump( buf, ptr, count/4 );
+   fprintf(stderr, "END BATCH\n\n\n");
+
+   count = batch->segment_finish_offset[1] - batch->segment_start_offset[1];
+   ptr = (GLuint *)(map + batch->segment_start_offset[1]);
+
+   fprintf(stderr, "\n\n\nDYNAMIC: (%d)\n", count / 4);
+   dump( buf, ptr, count/4 );
+   fprintf(stderr, "END BATCH\n\n\n");
+
+   count = batch->segment_finish_offset[2] - batch->segment_start_offset[2];
+   ptr = (GLuint *)(map + batch->segment_start_offset[2]);
+
+   fprintf(stderr, "\n\n\nOTHER INDIRECT: (%d)\n", count / 4);
+   dump( buf, ptr, count/4 );
    fprintf(stderr, "END BATCH\n\n\n");
 }
 
@@ -99,7 +129,8 @@ intel_batchbuffer_reset(struct intel_bat
 
    for (i = 0; i < batch->nr_relocs; i++) {
       struct buffer_reloc *r = &batch->reloc[i];
-      driBOUnReference(r->buf);
+      if (r->buf != batch->buffer)
+	 driBOUnReference(r->buf);
    }
 
    batch->list_count = 0;
@@ -118,7 +149,10 @@ intel_batchbuffer_reset(struct intel_bat
 
 
    batch->map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0);
-   batch->ptr = batch->map;
+
+   batch->segment_finish_offset[0] = batch->segment_start_offset[0];
+   batch->segment_finish_offset[1] = batch->segment_start_offset[1];
+   batch->segment_finish_offset[2] = batch->segment_start_offset[2];
 }
 
 /*======================================================================
@@ -136,6 +170,19 @@ intel_batchbuffer_alloc(struct intel_con
                  DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, 0);
    batch->last_fence = NULL;
    driBOCreateList(20, &batch->list);
+
+   batch->segment_start_offset[0] = 0 * SEGMENT_SZ;
+   batch->segment_start_offset[1] = 1 * SEGMENT_SZ;
+   batch->segment_start_offset[2] = 2 * SEGMENT_SZ;
+
+   batch->segment_finish_offset[0] = 0 * SEGMENT_SZ;
+   batch->segment_finish_offset[1] = 1 * SEGMENT_SZ;
+   batch->segment_finish_offset[2] = 2 * SEGMENT_SZ;
+
+   batch->segment_max_offset[0] = 1 * SEGMENT_SZ - BATCH_RESERVED;
+   batch->segment_max_offset[1] = 2 * SEGMENT_SZ;
+   batch->segment_max_offset[2] = 3 * SEGMENT_SZ;
+
    intel_batchbuffer_reset(batch);
    return batch;
 }
@@ -187,8 +234,8 @@ do_flush_locked(struct intel_batchbuffer
       ptr[r->offset / 4] = driBOOffset(r->buf) + r->delta;
    }
 
-   if (INTEL_DEBUG & DEBUG_BATCH)
-      intel_dump_batchbuffer(0, ptr, used);
+/*    if (INTEL_DEBUG & DEBUG_BATCH) */
+      intel_dump_batchbuffer(batch, ptr);
 
    driBOUnmap(batch->buffer);
    batch->map = NULL;
@@ -252,8 +299,9 @@ struct _DriFenceObject *
 intel_batchbuffer_flush(struct intel_batchbuffer *batch)
 {
    struct intel_context *intel = batch->intel;
-   GLuint used = batch->ptr - batch->map;
+   GLuint used = batch->segment_finish_offset[0] - batch->segment_start_offset[0];
    GLboolean was_locked = intel->locked;
+   GLint *ptr = (GLint *)(batch->map + batch->segment_finish_offset[0]);
 
    if (used == 0)
       return batch->last_fence;
@@ -265,19 +313,18 @@ intel_batchbuffer_flush(struct intel_bat
     * performance drain that we would like to avoid.
     */
    if (used & 4) {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = 0;
-      ((int *) batch->ptr)[2] = MI_BATCH_BUFFER_END;
+      ptr[0] = intel->vtbl.flush_cmd();
+      ptr[1] = 0;
+      ptr[2] = MI_BATCH_BUFFER_END;
       used += 12;
    }
    else {
-      ((int *) batch->ptr)[0] = intel->vtbl.flush_cmd();
-      ((int *) batch->ptr)[1] = MI_BATCH_BUFFER_END;
+      ptr[0] = intel->vtbl.flush_cmd();
+      ptr[1] = MI_BATCH_BUFFER_END;
       used += 8;
    }
 
    driBOUnmap(batch->buffer);
-   batch->ptr = NULL;
    batch->map = NULL;
 
    /* TODO: Just pass the relocation list and dma buffer up to the
@@ -312,33 +359,38 @@ intel_batchbuffer_finish(struct intel_ba
  */
 GLboolean
 intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+			     GLuint segment,
                              struct _DriBufferObject *buffer,
                              GLuint flags, GLuint mask, GLuint delta)
 {
    assert(batch->nr_relocs < MAX_RELOCS);
 
-   driBOAddListItem(&batch->list, buffer, flags, mask);
+   if (buffer != batch->buffer)
+      driBOAddListItem(&batch->list, buffer, flags, mask);
 
    {
       struct buffer_reloc *r = &batch->reloc[batch->nr_relocs++];
-      driBOReference(buffer);
+
+      if (buffer != batch->buffer)
+	 driBOReference(buffer);
+
       r->buf = buffer;
-      r->offset = batch->ptr - batch->map;
+      r->offset = batch->segment_finish_offset[segment];
       r->delta = delta;
    }
 
-   batch->ptr += 4;
+   batch->segment_finish_offset[segment] += 4;
    return GL_TRUE;
 }
 
 
-
 void
 intel_batchbuffer_data(struct intel_batchbuffer *batch,
+		       GLuint segment,
                        const void *data, GLuint bytes, GLuint flags)
 {
    assert((bytes & 3) == 0);
-   intel_batchbuffer_require_space(batch, bytes, flags);
-   __memcpy(batch->ptr, data, bytes);
-   batch->ptr += bytes;
+   intel_batchbuffer_require_space(batch, segment, bytes, flags);
+   __memcpy(batch->map + batch->segment_finish_offset[segment], data, bytes);
+   batch->segment_finish_offset[segment] += bytes;
 }
diff --git a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
index 916bcae..8b7f988 100644
--- a/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i915tex/intel_batchbuffer.h
@@ -9,7 +9,8 @@ struct intel_context;
 /* Must be able to hold at minimum VB->Size * 3 * 2 bytes for
  * intel_idx_render.c indices, which is currently about 20k.
  */
-#define BATCH_SZ (64*1024)
+#define BATCH_SZ (3*32*1024)
+#define SEGMENT_SZ (32*1024)
 #define BATCH_RESERVED 16
 
 #define MAX_RELOCS 400
@@ -24,6 +25,13 @@ struct buffer_reloc
    GLuint delta;                /* not needed? */
 };
 
+enum {
+   SEGMENT_IMMEDIATE = 0,
+   SEGMENT_DYNAMIC_INDIRECT = 1,
+   SEGMENT_OTHER_INDIRECT = 2,
+   NR_SEGMENTS = 3
+};
+
 struct intel_batchbuffer
 {
    struct bufmgr *bm;
@@ -36,11 +44,18 @@ struct intel_batchbuffer
    drmBOList list;
    GLuint list_count;
    GLubyte *map;
-   GLubyte *ptr;
 
    struct buffer_reloc reloc[MAX_RELOCS];
    GLuint nr_relocs;
    GLuint size;
+
+   /* Put all the different types of packets into one buffer for
+    * easier validation.  This will have to change, but for now it is
+    * enough to get started.
+    */
+   GLuint segment_start_offset[NR_SEGMENTS];
+   GLuint segment_finish_offset[NR_SEGMENTS];
+   GLuint segment_max_offset[NR_SEGMENTS];
 };
 
 struct intel_batchbuffer *intel_batchbuffer_alloc(struct intel_context
@@ -62,12 +77,15 @@ void intel_batchbuffer_reset(struct inte
  * intel_buffer_dword() calls.
  */
 void intel_batchbuffer_data(struct intel_batchbuffer *batch,
+			    GLuint segment,
                             const void *data, GLuint bytes, GLuint flags);
 
 void intel_batchbuffer_release_space(struct intel_batchbuffer *batch,
+				     GLuint segment,
                                      GLuint bytes);
 
 GLboolean intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
+				       GLuint segment,
                                        struct _DriBufferObject *buffer,
                                        GLuint flags,
                                        GLuint mask, GLuint offset);
@@ -78,27 +96,35 @@ GLboolean intel_batchbuffer_emit_reloc(s
  * work...
  */
 static INLINE GLuint
-intel_batchbuffer_space(struct intel_batchbuffer *batch)
+intel_batchbuffer_space(struct intel_batchbuffer *batch,
+			GLuint segment)
 {
-   return (batch->size - BATCH_RESERVED) - (batch->ptr - batch->map);
+   return (batch->segment_max_offset[segment] - 
+	   batch->segment_finish_offset[segment]);
 }
 
 
 static INLINE void
-intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
+intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, 
+			     GLuint segment,
+			     GLuint dword)
 {
    assert(batch->map);
-   assert(intel_batchbuffer_space(batch) >= 4);
-   *(GLuint *) (batch->ptr) = dword;
-   batch->ptr += 4;
+   assert(intel_batchbuffer_space(batch, segment) >= 4);
+   *(GLuint *) (batch->map + batch->segment_finish_offset[segment]) = dword;
+   batch->segment_finish_offset[segment] += 4;
 }
 
 static INLINE void
 intel_batchbuffer_require_space(struct intel_batchbuffer *batch,
+				GLuint segment,
                                 GLuint sz, GLuint flags)
 {
-   assert(sz < batch->size - 8);
-   if (intel_batchbuffer_space(batch) < sz ||
+   /* XXX:  need to figure out flushing, etc.
+    */
+   assert(sz < SEGMENT_SZ);
+
+   if (intel_batchbuffer_space(batch, segment) < sz ||
        (batch->flags != 0 && flags != 0 && batch->flags != flags))
       intel_batchbuffer_flush(batch);
 
@@ -109,19 +135,44 @@ intel_batchbuffer_require_space(struct i
  */
 #define BATCH_LOCALS
 
-#define BEGIN_BATCH(n, flags) do {				\
+
+/* Hack for indirect emit:
+ */
+#define BEGIN_BATCH_SEGMENT(seg, n, flags) do {				\
    assert(!intel->prim.flush);					\
-   intel_batchbuffer_require_space(intel->batch, (n)*4, flags);	\
+   intel_batchbuffer_require_space(intel->batch, seg, (n)*4, flags);	\
+   _mesa_printf("BEGIN_BATCH(%d,%d,%d) in %s\n", seg, n, flags, __FUNCTION__); \
 } while (0)
 
-#define OUT_BATCH(d)  intel_batchbuffer_emit_dword(intel->batch, d)
+#define OUT_BATCH_SEGMENT(seg, d) do {				\
+      _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, d);  		\
+      intel_batchbuffer_emit_dword(intel->batch, seg, d);	\
+} while (0)
+
+#define OUT_BATCH_F_SEGMENT(seg, fl) do {			\
+   fi_type fi;					\
+   fi.f = fl;					\
+   _mesa_printf("OUT_BATCH(%d, 0x%08x)\n", seg, fi.i);  \
+   intel_batchbuffer_emit_dword(intel->batch, seg, fi.i);	\
+} while (0)
 
-#define OUT_RELOC(buf,flags,mask,delta) do { 				\
-   assert((delta) >= 0);							\
-   intel_batchbuffer_emit_reloc(intel->batch, buf, flags, mask, delta);	\
+#define OUT_RELOC_SEGMENT(seg, buf,flags,mask,delta) do {				\
+   assert((delta) >= 0);						\
+   _mesa_printf("OUT_RELOC( seg %d buf %p offset %x )\n", seg, buf, delta);		\
+   intel_batchbuffer_emit_reloc(intel->batch, seg, buf, flags, mask, delta);	\
 } while (0)
 
-#define ADVANCE_BATCH() do { } while(0)
+#define ADVANCE_BATCH_SEGMENT(seg) do { \
+   _mesa_printf("ADVANCE_BATCH()\n");		\
+} while(0)
+
+
+#define BEGIN_BATCH(n, flags)           BEGIN_BATCH_SEGMENT(0, n, flags)
+#define OUT_BATCH(d)                    OUT_BATCH_SEGMENT(0, d)
+#define OUT_BATCH_F(fl)                 OUT_BATCH_F_SEGMENT(0, fl)
+#define OUT_RELOC(buf,flags,mask,delta) OUT_RELOC_SEGMENT(0,buf,flags,mask, delta)
+#define ADVANCE_BATCH()                 ADVANCE_BATCH_SEGMENT(0)
+
 
 
 #endif
diff --git a/src/mesa/drivers/dri/i915tex/intel_context.c b/src/mesa/drivers/dri/i915tex/intel_context.c
index 6786c5c..208e530 100644
--- a/src/mesa/drivers/dri/i915tex/intel_context.c
+++ b/src/mesa/drivers/dri/i915tex/intel_context.c
@@ -263,7 +263,7 @@ intelFlush(GLcontext * ctx)
 
    INTEL_FIREVERTICES(intel);
 
-   if (intel->batch->map != intel->batch->ptr)
+   if (intel->batch->segment_finish_offset[0] != 0)
       intel_batchbuffer_flush(intel->batch);
 
    /* XXX: Need to do an MI_FLUSH here.
diff --git a/src/mesa/drivers/dri/i915tex/intel_tris.c b/src/mesa/drivers/dri/i915tex/intel_tris.c
index 1ba49d8..aaf4d71 100644
--- a/src/mesa/drivers/dri/i915tex/intel_tris.c
+++ b/src/mesa/drivers/dri/i915tex/intel_tris.c
@@ -56,7 +56,7 @@ static void intelRasterPrimitive(GLconte
 static void
 intel_flush_inline_primitive(struct intel_context *intel)
 {
-   GLuint used = intel->batch->ptr - intel->prim.start_ptr;
+   GLuint used = intel->batch->segment_finish_offset[0];
 
    assert(intel->prim.primitive != ~0);
 
@@ -71,7 +71,7 @@ intel_flush_inline_primitive(struct inte
    goto finished;
 
  do_discard:
-   intel->batch->ptr -= used;
+   intel->batch->segment_finish_offset[0] -= used;
 
  finished:
    intel->prim.primitive = ~0;
@@ -95,7 +95,8 @@ intelStartInlinePrimitive(struct intel_c
     * be emitted to a batchbuffer missing the required full-state
     * preamble.
     */
-   if (intel_batchbuffer_space(intel->batch) < 100) {
+   if (intel_batchbuffer_space(intel->batch, 0) < 100) {
+      assert(0);		/* XXX: later! */
       intel_batchbuffer_flush(intel->batch);
       intel->vtbl.emit_state(intel);
    }
@@ -108,7 +109,7 @@ intelStartInlinePrimitive(struct intel_c
    BEGIN_BATCH(2, batch_flags);
    OUT_BATCH(0);
 
-   intel->prim.start_ptr = intel->batch->ptr;
+   intel->prim.start_ptr = intel->batch->map + intel->batch->segment_start_offset[0];
    intel->prim.primitive = prim;
    intel->prim.flush = intel_flush_inline_primitive;
 
@@ -138,15 +139,17 @@ intelExtendInlinePrimitive(struct intel_
 
    assert(intel->prim.flush == intel_flush_inline_primitive);
 
-   if (intel_batchbuffer_space(intel->batch) < sz)
+   if (intel_batchbuffer_space(intel->batch, 0) < sz) {
+      assert(0);		/* XXX: later */
       intelWrapInlinePrimitive(intel);
+   }
 
 /*    _mesa_printf("."); */
 
    intel->vtbl.assert_not_dirty(intel);
 
-   ptr = (GLuint *) intel->batch->ptr;
-   intel->batch->ptr += sz;
+   ptr = (GLuint *) (intel->batch->map + intel->batch->segment_finish_offset[0]);
+   intel->batch->segment_finish_offset[0] += sz;
 
    return ptr;
 }



More information about the mesa-commit mailing list