xf86-video-intel: Branch 'refs/remotes/origin/intel-batchbuffer' - 9 commits - src/i830_accel.c src/i830_dri.c src/i830_driver.c src/i830_exa.c src/i830.h src/i830_xaa.c src/i965_render.c src/intel_batchbuffer.h

Jesse Barnes jbarnes at kemper.freedesktop.org
Mon Oct 22 14:22:43 PDT 2007


 src/i830.h              |    4 
 src/i830_accel.c        |   29 -
 src/i830_dri.c          |   26 -
 src/i830_driver.c       |    3 
 src/i830_exa.c          |    2 
 src/i830_xaa.c          |  118 +++---
 src/i965_render.c       |  832 +++++++++++++++++++++++++++---------------------
 src/intel_batchbuffer.h |   10 
 8 files changed, 580 insertions(+), 444 deletions(-)

New commits:
commit cc25171dbdeeb4696bf0e014d6b811df45b73b21
Author: Dave Airlie <airlied at linux.ie>
Date:   Mon Oct 22 09:36:27 2007 +1000

    batchbuffer: don't wait for ring do batchbuffer finish

diff --git a/src/i830_accel.c b/src/i830_accel.c
index 5cfd46e..46f969f 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -184,7 +184,7 @@ I830Sync(ScrnInfoPtr pScrn)
    if (pI830->entityPrivate && !pI830->entityPrivate->RingRunning) return;
 
    if (pI830->use_ttm_batch) {
-     intel_batchbuffer_flush(pI830->batch);
+     intel_batchbuffer_finish(pI830->batch);
    }
    else
    {
@@ -202,10 +202,10 @@ I830Sync(ScrnInfoPtr pScrn)
        OUT_RING(MI_NOOP);		/* pad to quadword */
        ADVANCE_LP_RING();
      }
-   }
-   I830WaitLpRing(pScrn, pI830->LpRing->mem->size - 8, 0);
+     I830WaitLpRing(pScrn, pI830->LpRing->mem->size - 8, 0);
 
-   pI830->LpRing->space = pI830->LpRing->mem->size - 8;
+     pI830->LpRing->space = pI830->LpRing->mem->size - 8;
+   }
    pI830->nextColorExpandBuf = 0;
 }
 
@@ -215,14 +215,17 @@ I830EmitFlush(ScrnInfoPtr pScrn)
    I830Ptr pI830 = I830PTR(pScrn);
    int flags = MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE;
 
-   if (IS_I965G(pI830))
-      flags = 0;
-
-   {
-       BEGIN_LP_RING(2);
-       OUT_RING(MI_FLUSH | flags);
-       OUT_RING(MI_NOOP);		/* pad to quadword */
-       ADVANCE_LP_RING();
+   if (pI830->use_ttm_batch)
+      intel_batchbuffer_flush(pI830->batch);
+   else {
+      if (IS_I965G(pI830))
+         flags = 0;
+       {
+           BEGIN_BATCH(2);
+           OUT_BATCH(MI_FLUSH | flags);
+           OUT_BATCH(MI_NOOP);		/* pad to quadword */
+           ADVANCE_BATCH();
+       }
    }
 }
 
commit 0711e7fb1278e7467a6ede56df631b56b099d9d6
Author: Dave Airlie <airlied at linux.ie>
Date:   Mon Oct 22 09:36:10 2007 +1000

    xaa: fix misplaced bracket

diff --git a/src/i830_xaa.c b/src/i830_xaa.c
index 4edea27..382dd53 100644
--- a/src/i830_xaa.c
+++ b/src/i830_xaa.c
@@ -326,7 +326,7 @@ I830SetupForSolidFill(ScrnInfoPtr pScrn, int color, int rop,
      */
     pI830->BR[13] |= (I830PatternROP[rop] << 16);
 #else
-    pI830->BR[13] |= ((XAAGetPatternROP(rop) << 16);
+    pI830->BR[13] |= (XAAGetPatternROP(rop) << 16);
 #endif
 
     pI830->BR[16] = color;
commit d1795a6bdeb2ebe0612b9e8809540277a8edaefd
Author: Dave Airlie <airlied at linux.ie>
Date:   Mon Oct 22 09:35:52 2007 +1000

    dri: attempt to use batchbuffer for refresh area

diff --git a/src/i830_dri.c b/src/i830_dri.c
index 2a071ff..2746e89 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -1065,16 +1065,16 @@ I830DRIDoRefreshArea (ScrnInfoPtr pScrn, int num, BoxPtr pbox, CARD32 dst)
    }
 
    for (i = 0 ; i < num ; i++, pbox++) {
-      BEGIN_LP_RING(8);
-      OUT_RING(cmd);
-      OUT_RING(br13);
-      OUT_RING((pbox->y1 << 16) | pbox->x1);
-      OUT_RING((pbox->y2 << 16) | pbox->x2);
-      OUT_RING(dst);
-      OUT_RING((pbox->y1 << 16) | pbox->x1);
-      OUT_RING(br13 & 0xffff);
-      OUT_RING(pI830->front_buffer->offset);
-      ADVANCE_LP_RING();
+      BEGIN_BATCH(8);
+      OUT_BATCH(cmd);
+      OUT_BATCH(br13);
+      OUT_BATCH((pbox->y1 << 16) | pbox->x1);
+      OUT_BATCH((pbox->y2 << 16) | pbox->x2);
+      OUT_BATCH(dst);
+      OUT_BATCH((pbox->y1 << 16) | pbox->x1);
+      OUT_BATCH(br13 & 0xffff);
+      OUT_BATCH(pI830->front_buffer->offset);
+      ADVANCE_BATCH();
    }
 }
 
@@ -1090,12 +1090,17 @@ I830DRIRefreshArea (ScrnInfoPtr pScrn, int num, BoxPtr pbox)
    if (!pSAREAPriv->pf_active && pSAREAPriv->pf_current_page == 0)
       return;
 
+   if (pI830->use_ttm_batch)
+       intel_batchbuffer_flush(pI830->batch);
+
    I830DRIDoRefreshArea(pScrn, num, pbox, pI830->back_buffer->offset);
 
    if (pI830->third_buffer) {
       I830DRIDoRefreshArea(pScrn, num, pbox, pI830->third_buffer->offset);
    }
 
+   if (pI830->use_ttm_batch)
+       intel_batchbuffer_finish(pI830->batch);
    DamageEmpty(pI830->pDamage);
 }
 #endif
commit 8b79a6fc7ebc0d2b1e9497b2dcd45e3ae1ffda93
Author: Dave Airlie <airlied at redhat.com>
Date:   Mon Oct 22 09:13:44 2007 +1100

    xaa: use batchbuffer code paths

diff --git a/src/i830_xaa.c b/src/i830_xaa.c
index fabac20..4edea27 100644
--- a/src/i830_xaa.c
+++ b/src/i830_xaa.c
@@ -352,22 +352,22 @@ I830SubsequentSolidFillRect(ScrnInfoPtr pScrn, int x, int y, int w, int h)
 	ErrorF("I830SubsequentFillRectSolid %d,%d %dx%d\n", x, y, w, h);
 
     {
-	BEGIN_LP_RING(6);
+	BEGIN_BATCH(6);
 
 	if (pScrn->bitsPerPixel == 32) {
-	    OUT_RING(COLOR_BLT_CMD | COLOR_BLT_WRITE_ALPHA |
+	    OUT_BATCH(COLOR_BLT_CMD | COLOR_BLT_WRITE_ALPHA |
 		     COLOR_BLT_WRITE_RGB);
 	} else {
-	    OUT_RING(COLOR_BLT_CMD);
+	    OUT_BATCH(COLOR_BLT_CMD);
 	}
-	OUT_RING(pI830->BR[13]);
-	OUT_RING((h << 16) | (w * pI830->cpp));
-	OUT_RING(pI830->bufferOffset + (y * pScrn->displayWidth + x) *
+	OUT_BATCH(pI830->BR[13]);
+	OUT_BATCH((h << 16) | (w * pI830->cpp));
+	OUT_BATCH(pI830->bufferOffset + (y * pScrn->displayWidth + x) *
 		 pI830->cpp);
-	OUT_RING(pI830->BR[16]);
-	OUT_RING(0);
+	OUT_BATCH(pI830->BR[16]);
+	OUT_BATCH(0);
 
-	ADVANCE_LP_RING();
+	ADVANCE_BATCH();
     }
 
     if (IS_I965G(pI830))
@@ -429,23 +429,23 @@ I830SubsequentScreenToScreenCopy(ScrnInfoPtr pScrn, int src_x1, int src_y1,
     dst_y2 = dst_y1 + h;
 
     {
-	BEGIN_LP_RING(8);
+	BEGIN_BATCH(8);
 
 	if (pScrn->bitsPerPixel == 32) {
-	    OUT_RING(XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+	    OUT_BATCH(XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
 		     XY_SRC_COPY_BLT_WRITE_RGB | tiled << 15 | tiled << 11);
 	} else {
-	    OUT_RING(XY_SRC_COPY_BLT_CMD | tiled << 15 | tiled << 11);
+	    OUT_BATCH(XY_SRC_COPY_BLT_CMD | tiled << 15 | tiled << 11);
 	}
-	OUT_RING(pI830->BR[13]);
-	OUT_RING((dst_y1 << 16) | (dst_x1 & 0xffff));
-	OUT_RING((dst_y2 << 16) | (dst_x2 & 0xffff));
-	OUT_RING(pI830->bufferOffset);
-	OUT_RING((src_y1 << 16) | (src_x1 & 0xffff));
-	OUT_RING(pI830->BR[13] & 0xFFFF);
-	OUT_RING(pI830->bufferOffset);
-
-	ADVANCE_LP_RING();
+	OUT_BATCH(pI830->BR[13]);
+	OUT_BATCH((dst_y1 << 16) | (dst_x1 & 0xffff));
+	OUT_BATCH((dst_y2 << 16) | (dst_x2 & 0xffff));
+	OUT_BATCH(pI830->bufferOffset);
+	OUT_BATCH((src_y1 << 16) | (src_x1 & 0xffff));
+	OUT_BATCH(pI830->BR[13] & 0xFFFF);
+	OUT_BATCH(pI830->bufferOffset);
+
+	ADVANCE_BATCH();
     }
 
     if (IS_I965G(pI830))
@@ -506,28 +506,28 @@ I830SubsequentMono8x8PatternFillRect(ScrnInfoPtr pScrn, int pattx, int patty,
 	ErrorF("I830SubsequentMono8x8PatternFillRect\n");
 
     {
-	BEGIN_LP_RING(10);
+	BEGIN_BATCH(10);
 
 	if (pScrn->bitsPerPixel == 32) {
-	    OUT_RING(XY_MONO_PAT_BLT_CMD | XY_MONO_PAT_BLT_WRITE_ALPHA |
+	    OUT_BATCH(XY_MONO_PAT_BLT_CMD | XY_MONO_PAT_BLT_WRITE_ALPHA |
 		     XY_MONO_PAT_BLT_WRITE_RGB | tiled << 11 |
 		     ((patty << 8) & XY_MONO_PAT_VERT_SEED) |
 		     ((pattx << 12) & XY_MONO_PAT_HORT_SEED));
 	} else {
-	    OUT_RING(XY_MONO_PAT_BLT_CMD | tiled << 11 |
+	    OUT_BATCH(XY_MONO_PAT_BLT_CMD | tiled << 11 |
 		     ((patty << 8) & XY_MONO_PAT_VERT_SEED) |
 		     ((pattx << 12) & XY_MONO_PAT_HORT_SEED));
 	}
-	OUT_RING(pI830->BR[13]);
-	OUT_RING((y1 << 16) | x1);
-	OUT_RING((y2 << 16) | x2);
-	OUT_RING(pI830->bufferOffset);
-	OUT_RING(pI830->BR[18]);		/* bg */
-	OUT_RING(pI830->BR[19]);		/* fg */
-	OUT_RING(pI830->BR[16]);		/* pattern data */
-	OUT_RING(pI830->BR[17]);
-	OUT_RING(0);
-	ADVANCE_LP_RING();
+	OUT_BATCH(pI830->BR[13]);
+	OUT_BATCH((y1 << 16) | x1);
+	OUT_BATCH((y2 << 16) | x2);
+	OUT_BATCH(pI830->bufferOffset);
+	OUT_BATCH(pI830->BR[18]);		/* bg */
+	OUT_BATCH(pI830->BR[19]);		/* fg */
+	OUT_BATCH(pI830->BR[16]);		/* pattern data */
+	OUT_BATCH(pI830->BR[17]);
+	OUT_BATCH(0);
+	ADVANCE_BATCH();
     }
 
     if (IS_I965G(pI830))
@@ -630,23 +630,23 @@ I830SubsequentColorExpandScanline(ScrnInfoPtr pScrn, int bufno)
 	       bufno, pI830->BR[12]);
 
     {
-	BEGIN_LP_RING(8);
+	BEGIN_BATCH(8);
 
 	if (pScrn->bitsPerPixel == 32) {
-	    OUT_RING(XY_MONO_SRC_BLT_CMD | XY_MONO_SRC_BLT_WRITE_ALPHA |
+	    OUT_BATCH(XY_MONO_SRC_BLT_CMD | XY_MONO_SRC_BLT_WRITE_ALPHA |
 		     tiled << 11 | XY_MONO_SRC_BLT_WRITE_RGB);
 	} else {
-	    OUT_RING(XY_MONO_SRC_BLT_CMD | tiled << 11);
+	    OUT_BATCH(XY_MONO_SRC_BLT_CMD | tiled << 11);
 	}
-	OUT_RING(pI830->BR[13]);
-	OUT_RING(0);			/* x1 = 0, y1 = 0 */
-	OUT_RING(pI830->BR[11]);		/* x2 = w, y2 = 1 */
-	OUT_RING(pI830->BR[9]);		/* dst addr */
-	OUT_RING(pI830->BR[12]);		/* src addr */
-	OUT_RING(pI830->BR[18]);		/* bg */
-	OUT_RING(pI830->BR[19]);		/* fg */
-
-	ADVANCE_LP_RING();
+	OUT_BATCH(pI830->BR[13]);
+	OUT_BATCH(0);			/* x1 = 0, y1 = 0 */
+	OUT_BATCH(pI830->BR[11]);		/* x2 = w, y2 = 1 */
+	OUT_BATCH(pI830->BR[9]);		/* dst addr */
+	OUT_BATCH(pI830->BR[12]);		/* src addr */
+	OUT_BATCH(pI830->BR[18]);		/* bg */
+	OUT_BATCH(pI830->BR[19]);		/* fg */
+
+	ADVANCE_BATCH();
     }
 
     /* Advance to next scanline.
@@ -730,23 +730,23 @@ I830SubsequentImageWriteScanline(ScrnInfoPtr pScrn, int bufno)
 	       bufno, pI830->BR[12]);
 
     {
-	BEGIN_LP_RING(8);
+	BEGIN_BATCH(8);
 
 	if (pScrn->bitsPerPixel == 32) {
-	    OUT_RING(XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
+	    OUT_BATCH(XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA |
 		     tiled << 11 | XY_SRC_COPY_BLT_WRITE_RGB);
 	} else {
-	    OUT_RING(XY_SRC_COPY_BLT_CMD | tiled << 11);
+	    OUT_BATCH(XY_SRC_COPY_BLT_CMD | tiled << 11);
 	}
-	OUT_RING(pI830->BR[13]);
-	OUT_RING(0);				/* x1 = 0, y1 = 0 */
-	OUT_RING(pI830->BR[11]);		/* x2 = w, y2 = 1 */
-	OUT_RING(pI830->BR[9]);			/* dst addr */
-	OUT_RING(0);				/* source origin (0,0) */
-	OUT_RING(pI830->BR[11] & 0xffff);	/* source pitch */
-	OUT_RING(pI830->BR[12]);		/* src addr */
-
-	ADVANCE_LP_RING();
+	OUT_BATCH(pI830->BR[13]);
+	OUT_BATCH(0);				/* x1 = 0, y1 = 0 */
+	OUT_BATCH(pI830->BR[11]);		/* x2 = w, y2 = 1 */
+	OUT_BATCH(pI830->BR[9]);			/* dst addr */
+	OUT_BATCH(0);				/* source origin (0,0) */
+	OUT_BATCH(pI830->BR[11] & 0xffff);	/* source pitch */
+	OUT_BATCH(pI830->BR[12]);		/* src addr */
+
+	ADVANCE_BATCH();
     }
 
     /* Advance to next scanline.
commit 773eb6a4f0a50f013f937aaf687fa739d7b380ab
Author: Dave Airlie <airlied at redhat.com>
Date:   Mon Oct 22 09:13:31 2007 +1100

    batchbuffer: don't do a ring flush here

diff --git a/src/i830_accel.c b/src/i830_accel.c
index 1997605..5cfd46e 100644
--- a/src/i830_accel.c
+++ b/src/i830_accel.c
@@ -186,7 +186,7 @@ I830Sync(ScrnInfoPtr pScrn)
    if (pI830->use_ttm_batch) {
      intel_batchbuffer_flush(pI830->batch);
    }
-
+   else
    {
      if (IS_I965G(pI830))
        flags = 0;
commit d24452e7296054f5ab627fe27e7e2567f7fbae9e
Author: Dave Airlie <airlied at linux.ie>
Date:   Fri Oct 19 15:09:49 2007 +1000

    i965: oops always with forgetting the initialisers

diff --git a/src/i965_render.c b/src/i965_render.c
index b0c2842..1e423a2 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -723,9 +723,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 {
     ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
-    CARD32 src_pitch, src_tile_format, src_tiled;
+    CARD32 src_pitch, src_tile_format = 0, src_tiled = 0;
     CARD32 mask_pitch = 0, mask_tile_format = 0, mask_tiled = 0;
-    CARD32 dst_format, dst_pitch, dst_tile_format, dst_tiled;
+    CARD32 dst_format, dst_pitch, dst_tile_format = 0, dst_tiled = 0;
     Bool rotation_program = FALSE;
     struct brw_cc_unit_state *cc_state;
     int need_ps_kernel, need_sf_kernel;
commit 2012d3f1925a7cdb2b0fdc800ec0c4f577e156b7
Author: Dave Airlie <airlied at linux.ie>
Date:   Fri Oct 19 15:05:19 2007 +1000

    i965/exa: fixup code for relocations using new bufmgr

diff --git a/src/i965_render.c b/src/i965_render.c
index edf36a8..b0c2842 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -808,14 +808,15 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     i965_get_dest_format(pDstPicture, &dst_format);
     dest_surf_state->ss0.surface_format = dst_format;
 
-#ifdef I830_USE_BB
-    i830_batchbuffer_emit_pixmap(pDst,
-				 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
-				 DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_CACHED,
-				 exa_buf->bo.handle, dest_surf_offset + 4, 0);
-#else
-    dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pDst);
-#endif
+    if (pI830->use_ttm_batch) {
+    	intel_batchbuffer_emit_pixmap(pDst,
+				     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+				     DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_CACHED,
+				     pI830->exa965->buf, dest_surf_offset + 4, 0);
+    } else {
+        dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pDst);
+    }
+
     dest_surf_state->ss2.height = pDst->drawable.height - 1;
     dest_surf_state->ss2.width = pDst->drawable.width - 1;
     dest_surf_state->ss3.pitch = dst_pitch - 1;
@@ -826,14 +827,14 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     src_surf_state = (void *)(start_base + src_surf_offset);
     src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture);
 
-#ifdef I830_USE_BB
-    i830_batchbuffer_emit_pixmap(pSrc,
+    if (pI830->use_ttm_batch) {
+        intel_batchbuffer_emit_pixmap(pSrc,
 				 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
 				 DRM_BO_MASK_MEM | DRM_BO_FLAG_READ | DRM_BO_FLAG_CACHED,
-				 exa_buf->bo.handle, src_surf_offset + 4, 0);
-#else
-    src_surf_state->ss1.base_addr = intel_get_pixmap_offset(pSrc);
-#endif
+				 pI830->exa965->buf, src_surf_offset + 4, 0);
+    } else {
+        src_surf_state->ss1.base_addr = intel_get_pixmap_offset(pSrc);
+    }
     src_surf_state->ss2.width = pSrc->drawable.width - 1;
     src_surf_state->ss2.height = pSrc->drawable.height - 1;
     src_surf_state->ss3.pitch = src_pitch - 1;
@@ -844,14 +845,14 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     if (pMask) {
 	mask_surf_state = (void *)(start_base + mask_surf_offset);
    	mask_surf_state->ss0.surface_format = i965_get_card_format(pMaskPicture);
-#ifdef I830_USE_BB
-	i830_batchbuffer_emit_pixmap(pMask, 
+        if (pI830->use_ttm_batch) {
+	   intel_batchbuffer_emit_pixmap(pMask, 
 				     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
 				     DRM_BO_MASK_MEM | DRM_BO_FLAG_READ | DRM_BO_FLAG_CACHED,
-				     exa_buf->bo.handle, mask_surf_offset + 4, 0);
-#else
-	mask_surf_state->ss1.base_addr = intel_get_pixmap_offset(pMask);
-#endif
+				     pI830->exa965->buf, mask_surf_offset + 4, 0);
+        } else {
+	    mask_surf_state->ss1.base_addr = intel_get_pixmap_offset(pMask);
+	}
    	mask_surf_state->ss2.width = pMask->drawable.width - 1;
    	mask_surf_state->ss2.height = pMask->drawable.height - 1;
    	mask_surf_state->ss3.pitch = mask_pitch - 1;
commit c7dea70a6d6d7579093d1e175f5b74b89c262f26
Author: Dave Airlie <airlied at linux.ie>
Date:   Fri Oct 19 14:58:54 2007 +1000

    i965: move state setup function

diff --git a/src/i830_dri.c b/src/i830_dri.c
index 070f0f5..2a071ff 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -533,8 +533,6 @@ I830InitBufMgr(ScreenPtr pScreen)
    pI830->batch = intel_batchbuffer_alloc(pScrn);
    pI830->use_ttm_batch = TRUE;
 
-   if (IS_I965G(pI830))
-     i965_init_exa_state(pScrn);
 }
 
 Bool
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 00194cb..8b246ef 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -2787,6 +2787,9 @@ I830ScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
     if (pScrn->virtualX > pScrn->displayWidth)
 	pScrn->displayWidth = pScrn->virtualX;
 
+   if (IS_I965G(pI830))
+     i965_init_exa_state(pScrn);
+
    DPRINTF(PFX, "assert( if(!fbScreenInit(pScreen, ...) )\n");
    if (!fbScreenInit(pScreen, pI830->FbBase + pScrn->fbOffset, 
                      pScrn->virtualX, pScrn->virtualY,
commit f58169e9221153045c25d8ee69d3598f80f2e7f5
Author: Dave Airlie <airlied at redhat.com>
Date:   Fri Oct 19 12:58:24 2007 +1100

    add initial 965 batch support

diff --git a/src/i830.h b/src/i830.h
index 795e12f..12b4e77 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -573,6 +573,7 @@ typedef struct _I830Rec {
    unsigned int quirk_flag;
 
    /* batchbuffer support */
+   struct i965_exastate_buffer *exa965;
    struct intel_batchbuffer *batch;
    dri_bufmgr *bufmgr;
    unsigned int maxBatchSize;
@@ -759,7 +760,8 @@ Bool i965_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
 			    PixmapPtr pMaskPixmap, PixmapPtr pDstPixmap);
 void i965_composite(PixmapPtr pDst, int srcX, int srcY,
 		    int maskX, int maskY, int dstX, int dstY, int w, int h);
-
+void i965_done_composite(PixmapPtr pDst);
+int i965_init_exa_state(ScrnInfoPtr pScrn);
 void
 i830_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
 				 float *x_out, float *y_out);
diff --git a/src/i830_dri.c b/src/i830_dri.c
index 7cc2bb1..070f0f5 100644
--- a/src/i830_dri.c
+++ b/src/i830_dri.c
@@ -532,6 +532,9 @@ I830InitBufMgr(ScreenPtr pScreen)
 	return;
    pI830->batch = intel_batchbuffer_alloc(pScrn);
    pI830->use_ttm_batch = TRUE;
+
+   if (IS_I965G(pI830))
+     i965_init_exa_state(pScrn);
 }
 
 Bool
diff --git a/src/i830_exa.c b/src/i830_exa.c
index 86e523b..3106db9 100644
--- a/src/i830_exa.c
+++ b/src/i830_exa.c
@@ -626,7 +626,7 @@ I830EXAInit(ScreenPtr pScreen)
  	pI830->EXADriverPtr->CheckComposite = i965_check_composite;
  	pI830->EXADriverPtr->PrepareComposite = i965_prepare_composite;
  	pI830->EXADriverPtr->Composite = i965_composite;
- 	pI830->EXADriverPtr->DoneComposite = i830_done_composite;
+ 	pI830->EXADriverPtr->DoneComposite = i965_done_composite;
     }
 #if EXA_VERSION_MINOR >= 4
     if (pI830->use_ttm_batch) {
diff --git a/src/i965_render.c b/src/i965_render.c
index a9d3f23..edf36a8 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -254,26 +254,20 @@ static int urb_clip_start, urb_clip_size;
 static int urb_sf_start, urb_sf_size;
 static int urb_cs_start, urb_cs_size;
 
-static struct brw_surface_state *dest_surf_state, dest_surf_state_local;
-static struct brw_surface_state *src_surf_state, src_surf_state_local;
-static struct brw_surface_state *mask_surf_state, mask_surf_state_local;
-static struct brw_sampler_state *src_sampler_state, src_sampler_state_local;
-static struct brw_sampler_state *mask_sampler_state, mask_sampler_state_local;
+static struct brw_surface_state *dest_surf_state;
+static struct brw_surface_state *src_surf_state;
+static struct brw_surface_state *mask_surf_state;
+static struct brw_sampler_state *src_sampler_state;
+static struct brw_sampler_state *mask_sampler_state;
 static struct brw_sampler_default_color *default_color_state;
 
-static struct brw_vs_unit_state *vs_state, vs_state_local;
-static struct brw_sf_unit_state *sf_state, sf_state_local;
-static struct brw_wm_unit_state *wm_state, wm_state_local;
-static struct brw_cc_unit_state *cc_state, cc_state_local;
-static struct brw_cc_viewport *cc_viewport;
-
-static struct brw_instruction *sf_kernel;
-static struct brw_instruction *ps_kernel;
-static struct brw_instruction *sip_kernel;
+static struct brw_vs_unit_state *vs_state;
+static struct brw_sf_unit_state *sf_state;
+static struct brw_wm_unit_state *wm_state;
 
 static CARD32 *binding_table;
-static int binding_table_entries;
 
+/* these offsets will remain the same for all buffers post allocation */
 static int dest_surf_offset, src_surf_offset, mask_surf_offset;
 static int src_sampler_offset, mask_sampler_offset,vs_offset;
 static int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset;
@@ -281,11 +275,9 @@ static int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset;
 static int wm_scratch_offset;
 static int binding_table_offset;
 static int default_color_offset;
-static int next_offset, total_state_size;
-static char *state_base;
-static int state_base_offset;
+//static int next_offset, total_state_size;
 static float *vb;
-static int vb_size = (6 * 4) * 4 ; /* 6 DWORDS per vertex - and mask*/
+static int vb_max_size, vb_index;
 
 static CARD32 src_blend, dst_blend;
 
@@ -332,6 +324,20 @@ static const CARD32 sf_kernel_static_rotation[][4] = {
 #include "exa_sf_rotation_prog.h"
 };
 
+struct i965_kernels {
+    void *kernel;
+    int size;
+
+};
+
+static struct i965_kernels sf_kernels[] = { { sf_kernel_static, sizeof(sf_kernel_static) },
+					    { sf_kernel_static_mask, sizeof(sf_kernel_static_mask) },
+					    { sf_kernel_static_rotation, sizeof(sf_kernel_static_rotation) } };
+
+#define SF_KERNEL 0
+#define SF_KERNEL_MASK 1
+#define SF_KERNEL_ROTATION 2
+
 /* ps kernels */
 #define PS_KERNEL_NUM_GRF   32
 #define PS_MAX_THREADS	   32
@@ -356,6 +362,18 @@ static const CARD32 ps_kernel_static_rotation [][4] = {
 #include "exa_wm_rotation_prog.h"
 };
 
+static struct i965_kernels ps_kernels[] = { { ps_kernel_static_nomask, sizeof(ps_kernel_static_nomask) },
+					    { ps_kernel_static_maskca, sizeof(ps_kernel_static_maskca) },
+					    { ps_kernel_static_maskca_srcalpha, sizeof(ps_kernel_static_maskca_srcalpha) },
+					    { ps_kernel_static_masknoca, sizeof(ps_kernel_static_masknoca) },
+ 					    { ps_kernel_static_rotation, sizeof(ps_kernel_static_rotation) } };
+
+#define PS_KERNEL_NOMASK 0
+#define PS_KERNEL_MASKCA 1
+#define PS_KERNEL_MASKCA_SRCALPHA 2
+#define PS_KERNEL_MASKNOCA 3
+#define PS_KERNEL_ROTATION 4
+
 static CARD32 
 i965_get_card_format(PicturePtr pPict)
 {
@@ -385,69 +403,18 @@ i965_check_rotation_transform(PictTransformPtr t)
 	return FALSE;
 }
 
-Bool
-i965_prepare_composite(int op, PicturePtr pSrcPicture,
-		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
-		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+/* initialise the state offsets these should not change at runtime */
+static void
+i965_init_state_offsets(ScrnInfoPtr pScrn, int total_size)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
-    I830Ptr pI830 = I830PTR(pScrn);
-    CARD32 src_offset, src_pitch, src_tile_format = 0, src_tiled = 0;
-    CARD32 mask_offset = 0, mask_pitch = 0, mask_tile_format = 0,
-	mask_tiled = 0;
-    CARD32 dst_format, dst_offset, dst_pitch, dst_tile_format = 0,
-	dst_tiled = 0;
-    Bool rotation_program = FALSE;
-
-    IntelEmitInvarientState(pScrn);
-    *pI830->last_3d = LAST_3D_RENDER;
-
-    src_offset = intel_get_pixmap_offset(pSrc);
-    src_pitch = intel_get_pixmap_pitch(pSrc);
-    if (i830_pixmap_tiled(pSrc)) {
-	src_tiled = 1;
-	src_tile_format = 0; /* Tiled X */
-    }
-    dst_offset = intel_get_pixmap_offset(pDst);
-    dst_pitch = intel_get_pixmap_pitch(pDst);
-    if (i830_pixmap_tiled(pDst)) {
-	dst_tiled = 1;
-	dst_tile_format = 0; /* Tiled X */
-    }
-    if (pMask) {
-	mask_offset = intel_get_pixmap_offset(pMask);
-	mask_pitch = intel_get_pixmap_pitch(pMask);
-	if (i830_pixmap_tiled(pMask)) {
-	    mask_tiled = 1;
-	    mask_tile_format = 0; /* Tiled X */
-	}
-    }
-    pI830->scale_units[0][0] = pSrc->drawable.width;
-    pI830->scale_units[0][1] = pSrc->drawable.height;
-
-    pI830->transform[0] = pSrcPicture->transform;
-
-    if (!pMask) {
-	pI830->transform[1] = NULL;
-	pI830->scale_units[1][0] = -1;
-	pI830->scale_units[1][1] = -1;
-	if (pI830->transform[0] && 
-		i965_check_rotation_transform(pI830->transform[0]))
-	    rotation_program = TRUE;
-    } else {
-	pI830->transform[1] = pMaskPicture->transform;
-	if (pI830->transform[1])
-	    I830FALLBACK("i965 mask transform not implemented!\n");
-	pI830->scale_units[1][0] = pMask->drawable.width;
-	pI830->scale_units[1][1] = pMask->drawable.height;
-    }
-
-    /* setup 3d pipeline state */
+    unsigned int next_offset = 0, total_state_size;
+    static int init;
+    int tmp;
 
-    binding_table_entries = 2; /* default no mask */
+    if (init)
+	return;
 
-    /* Set up our layout of state in framebuffer.  First the general state: */
-    next_offset = 0;
+    init = 1;
     vs_offset = ALIGN(next_offset, 64);
     next_offset = vs_offset + sizeof(*vs_state);
 
@@ -461,57 +428,43 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     next_offset = wm_scratch_offset + 1024 * PS_MAX_THREADS;
 
     cc_offset = ALIGN(next_offset, 32);
-    next_offset = cc_offset + sizeof(*cc_state);
+    next_offset = cc_offset + sizeof(struct brw_cc_unit_state);
 
-    /* keep current sf_kernel, which will send one setup urb entry to
-     * PS kernel
-     */
     sf_kernel_offset = ALIGN(next_offset, 64);
-    if (pMask)
-	next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
-    else if (rotation_program)
-	next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation);
-    else 
-	next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
+    tmp = sizeof(sf_kernel_static_mask);
+    if (tmp < sizeof(sf_kernel_static_rotation))
+	tmp = sizeof(sf_kernel_static_rotation);
+    if (tmp < sizeof(sf_kernel_static))
+	tmp = sizeof(sf_kernel_static);
+
+    next_offset = sf_kernel_offset + tmp;
 
     ps_kernel_offset = ALIGN(next_offset, 64);
-    if (pMask) {
-	if (pMaskPicture->componentAlpha && 
-                PICT_FORMAT_RGB(pMaskPicture->format)) {
-            if (i965_blend_op[op].src_alpha) {
-                next_offset = ps_kernel_offset + 
-                    sizeof(ps_kernel_static_maskca_srcalpha);
-            } else {
-                next_offset = ps_kernel_offset + 
-                    sizeof(ps_kernel_static_maskca);
-            }
-        } else
-	    next_offset = ps_kernel_offset + 
-                          sizeof(ps_kernel_static_masknoca);
-    } else if (rotation_program) {
-   	next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation);
-    } else {
-   	next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask);
-    }
+    tmp = sizeof(ps_kernel_static_maskca_srcalpha);
+    if (tmp < sizeof(ps_kernel_static_maskca))
+	tmp = sizeof(ps_kernel_static_maskca);
+    if (tmp < sizeof(ps_kernel_static_masknoca))
+	tmp = sizeof(ps_kernel_static_masknoca);
+    if (tmp < sizeof(ps_kernel_static_rotation))
+	tmp = sizeof(ps_kernel_static_rotation);
+    if (tmp < sizeof(ps_kernel_static_nomask))
+	tmp = sizeof(ps_kernel_static_nomask);
+    
+    next_offset = ps_kernel_offset + tmp;
 
     sip_kernel_offset = ALIGN(next_offset, 64);
     next_offset = sip_kernel_offset + sizeof (sip_kernel_static);
 
     /* needed? */
     cc_viewport_offset = ALIGN(next_offset, 32);
-    next_offset = cc_viewport_offset + sizeof(*cc_viewport);
+    next_offset = cc_viewport_offset + sizeof(struct brw_cc_viewport);
 
     /* for texture sampler */
     src_sampler_offset = ALIGN(next_offset, 32);
     next_offset = src_sampler_offset + sizeof(*src_sampler_state);
-
-    if (pMask) {
-   	mask_sampler_offset = ALIGN(next_offset, 32);
-   	next_offset = mask_sampler_offset + sizeof(*mask_sampler_state);
-    }
-    /* Align VB to native size of elements, for safety */
-    vb_offset = ALIGN(next_offset, 32);
-    next_offset = vb_offset + vb_size;
+    
+    mask_sampler_offset = ALIGN(next_offset, 32);
+    next_offset = mask_sampler_offset + sizeof(*mask_sampler_state);
 
     /* And then the general state: */
     dest_surf_offset = ALIGN(next_offset, 32);
@@ -520,37 +473,22 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     src_surf_offset = ALIGN(next_offset, 32);
     next_offset = src_surf_offset + sizeof(*src_surf_state);
 
-    if (pMask) {
-   	mask_surf_offset = ALIGN(next_offset, 32);
-   	next_offset = mask_surf_offset + sizeof(*mask_surf_state);
-	binding_table_entries = 3;
-    }
+    mask_surf_offset = ALIGN(next_offset, 32);
+    next_offset = mask_surf_offset + sizeof(*mask_surf_state);
 
     binding_table_offset = ALIGN(next_offset, 32);
-    next_offset = binding_table_offset + (binding_table_entries * 4);
+    next_offset = binding_table_offset + (4 * 4);
 
     default_color_offset = ALIGN(next_offset, 32);
     next_offset = default_color_offset + sizeof(*default_color_state);
 
     total_state_size = next_offset;
-    assert(total_state_size < pI830->exa_965_state->size);
-
-    state_base_offset = pI830->exa_965_state->offset;
-    state_base_offset = ALIGN(state_base_offset, 64);
-    state_base = (char *)(pI830->FbBase + state_base_offset);
-
-    sf_kernel = (void *)(state_base + sf_kernel_offset);
-    ps_kernel = (void *)(state_base + ps_kernel_offset);
-    sip_kernel = (void *)(state_base + sip_kernel_offset);
-
-    cc_viewport = (void *)(state_base + cc_viewport_offset);
-
-    binding_table = (void *)(state_base + binding_table_offset);
-
-    vb = (void *)(state_base + vb_offset);
 
-    default_color_state = (void*)(state_base + default_color_offset);
+    /* Align VB to native size of elements, for safety */
+    vb_offset = ALIGN(next_offset, 32);
+    vb_max_size = total_size - vb_offset;
 
+    ErrorF("%d available for vertex data\n", vb_max_size);
     /* Set up a default static partitioning of the URB, which is supposed to
      * allow anything we would want to do, at potentially lower performance.
      */
@@ -580,58 +518,45 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     urb_cs_start = urb_sf_start + urb_sf_size;
     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
 
-    /* Because we only have a single static buffer for our state currently,
-     * we have to sync before updating it every time.
-     */
-    i830WaitSync(pScrn);
+    //    assert(total_state_size < pI830->exa_965_state->size);
+}
 
-    memset (cc_viewport, 0, sizeof (*cc_viewport));
+static void
+i965_init_state_objects(ScrnInfoPtr pScrn, unsigned char *start_base)
+{
+    /* cc viewport */
+    struct brw_cc_viewport *cc_viewport;
+    struct brw_cc_unit_state *cc_state;
+    struct brw_surface_state *dest_surf_state, *src_surf_state, *mask_surf_state;
+
+    cc_viewport = (void *)(start_base + cc_viewport_offset);
     cc_viewport->min_depth = -1.e35;
     cc_viewport->max_depth = 1.e35;
 
-    /* Color calculator state */
-    cc_state = &cc_state_local;
-    memset(cc_state, 0, sizeof(*cc_state));
+    cc_state = (void *)(start_base + cc_offset);
     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
     cc_state->cc2.depth_test = 0;       /* disable depth test */
     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
     cc_state->cc3.ia_blend_enable = 1;  /* blend alpha just like colors */
     cc_state->cc3.blend_enable = 1;     /* enable color blend */
     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
-    cc_state->cc4.cc_viewport_state_offset = (state_base_offset +
-					      cc_viewport_offset) >> 5;
+    cc_state->cc4.cc_viewport_state_offset = cc_viewport_offset >> 5;
     cc_state->cc5.dither_enable = 0;    /* disable dither */
     cc_state->cc5.logicop_func = 0xc;   /* COPY */
     cc_state->cc5.statistics_enable = 1;
     cc_state->cc5.ia_blend_function = BRW_BLENDFUNCTION_ADD;
-    i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
-			&src_blend, &dst_blend);
-    /* XXX: alpha blend factor should be same as color, but check
-     * for CA case in future
-     */
-    cc_state->cc5.ia_src_blend_factor = src_blend;
-    cc_state->cc5.ia_dest_blend_factor = dst_blend;
     cc_state->cc6.blend_function = BRW_BLENDFUNCTION_ADD;
-    cc_state->cc6.src_blend_factor = src_blend;
-    cc_state->cc6.dest_blend_factor = dst_blend;
     cc_state->cc6.clamp_post_alpha_blend = 1;
     cc_state->cc6.clamp_pre_alpha_blend = 1;
     cc_state->cc6.clamp_range = 0;  /* clamp range [0,1] */
 
-    cc_state = (void *)(state_base + cc_offset);
-    memcpy (cc_state, &cc_state_local, sizeof (cc_state_local));
-
     /* Upload system kernel */
-    memcpy (sip_kernel, sip_kernel_static, sizeof (sip_kernel_static));
+    memcpy (start_base + sip_kernel_offset, sip_kernel_static, sizeof (sip_kernel_static));
 
-    /* Set up the state buffer for the destination surface */
-    dest_surf_state = &dest_surf_state_local;
-    memset(dest_surf_state, 0, sizeof(*dest_surf_state));
+    /* destination surface state */
+    dest_surf_state = (void *)(start_base + dest_surf_offset);
     dest_surf_state->ss0.surface_type = BRW_SURFACE_2D;
     dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32;
-    i965_get_dest_format(pDstPicture, &dst_format);
-    dest_surf_state->ss0.surface_format = dst_format;
-
     dest_surf_state->ss0.writedisable_alpha = 0;
     dest_surf_state->ss0.writedisable_red = 0;
     dest_surf_state->ss0.writedisable_green = 0;
@@ -641,25 +566,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     dest_surf_state->ss0.vert_line_stride_ofs = 0;
     dest_surf_state->ss0.mipmap_layout_mode = 0;
     dest_surf_state->ss0.render_cache_read_mode = 0;
-
-    dest_surf_state->ss1.base_addr = dst_offset;
-    dest_surf_state->ss2.height = pDst->drawable.height - 1;
-    dest_surf_state->ss2.width = pDst->drawable.width - 1;
     dest_surf_state->ss2.mip_count = 0;
     dest_surf_state->ss2.render_target_rotation = 0;
-    dest_surf_state->ss3.pitch = dst_pitch - 1;
-    dest_surf_state->ss3.tile_walk = dst_tile_format;
-    dest_surf_state->ss3.tiled_surface = dst_tiled;
 
-    dest_surf_state = (void *)(state_base + dest_surf_offset);
-    memcpy (dest_surf_state, &dest_surf_state_local, sizeof (dest_surf_state_local));
-
-    /* Set up the source surface state buffer */
-    src_surf_state = &src_surf_state_local;
-    memset(src_surf_state, 0, sizeof(*src_surf_state));
+    /* source surface state */
+    src_surf_state = (void *)(start_base + src_surf_offset);
     src_surf_state->ss0.surface_type = BRW_SURFACE_2D;
-    src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture);
-
     src_surf_state->ss0.writedisable_alpha = 0;
     src_surf_state->ss0.writedisable_red = 0;
     src_surf_state->ss0.writedisable_green = 0;
@@ -669,60 +581,295 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     src_surf_state->ss0.vert_line_stride_ofs = 0;
     src_surf_state->ss0.mipmap_layout_mode = 0;
     src_surf_state->ss0.render_cache_read_mode = 0;
+    src_surf_state->ss2.mip_count = 0;
+    src_surf_state->ss2.render_target_rotation = 0;
+
+    /* mask surface state */
+    mask_surf_state = (void *)(start_base + mask_surf_offset);
+    mask_surf_state->ss0.surface_type = BRW_SURFACE_2D;
+    mask_surf_state->ss0.writedisable_alpha = 0;
+    mask_surf_state->ss0.writedisable_red = 0;
+    mask_surf_state->ss0.writedisable_green = 0;
+    mask_surf_state->ss0.writedisable_blue = 0;
+    mask_surf_state->ss0.color_blend = 1;
+    mask_surf_state->ss0.vert_line_stride = 0;
+    mask_surf_state->ss0.vert_line_stride_ofs = 0;
+    mask_surf_state->ss0.mipmap_layout_mode = 0;
+    mask_surf_state->ss0.render_cache_read_mode = 0;
+    mask_surf_state->ss2.mip_count = 0;
+    mask_surf_state->ss2.render_target_rotation = 0;
+
+    /* default color state */
+    default_color_state = (void *)(start_base + default_color_offset);
+    default_color_state->color[0] = 0.0; /* R */
+    default_color_state->color[1] = 0.0; /* G */
+    default_color_state->color[2] = 0.0; /* B */
+    default_color_state->color[3] = 0.0; /* A */
+
+    /* src sampler state */
+    src_sampler_state = (void *)(start_base + src_sampler_offset);
+    src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+    src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
+    src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
+
+    /* mask sampler state */
+    mask_sampler_state = (void *)(start_base + mask_sampler_offset);
+    mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+    mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
+
+    /* vertex shader state */
+    /* Set up the vertex shader to be disabled (passthrough) */
+    vs_state = (void *)(start_base + vs_offset);
+    vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
+    vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
+    vs_state->vs6.vs_enable = 0;
+    vs_state->vs6.vert_cache_disable = 1;
+
+    /* sf state */
+    sf_state = (void *)(start_base + sf_offset);
+    sf_state->thread0.kernel_start_pointer = sf_kernel_offset >> 6;
+    sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
+    sf_state->sf1.single_program_flow = 1;
+    sf_state->sf1.binding_table_entry_count = 0;
+    sf_state->sf1.thread_priority = 0;
+    sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
+    sf_state->sf1.illegal_op_exception_enable = 1;
+    sf_state->sf1.mask_stack_exception_enable = 1;
+    sf_state->sf1.sw_exception_enable = 1;
+    sf_state->thread2.per_thread_scratch_space = 0;
+    /* scratch space is not used in our kernel */
+    sf_state->thread2.scratch_space_base_pointer = 0;
+    sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
+    sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
+    sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
+    /* don't smash vertex header, read start from dw8 */
+    sf_state->thread3.urb_entry_read_offset = 1;
+    sf_state->thread3.dispatch_grf_start_reg = 3;
+    sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
+    sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
+    sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
+    sf_state->thread4.stats_enable = 1;
+    sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
+    sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
+    sf_state->sf6.scissor = 0;
+    sf_state->sf7.trifan_pv = 2;
+    sf_state->sf6.dest_org_vbias = 0x8;
+    sf_state->sf6.dest_org_hbias = 0x8;
+
+    /* wm state */
+    wm_state = (void *)(start_base + wm_offset);
+    wm_state->thread0.kernel_start_pointer = ps_kernel_offset >> 6;
+    wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
+    wm_state->thread1.single_program_flow = 1;
+    wm_state->thread2.scratch_space_base_pointer = wm_scratch_offset>>10;
+    wm_state->thread2.per_thread_scratch_space = 0;
+    wm_state->thread3.const_urb_entry_read_length = 0;
+    wm_state->thread3.const_urb_entry_read_offset = 0;
+    /* Each pair of attributes (src/mask coords) is one URB entry */
+    wm_state->thread3.urb_entry_read_offset = 0;
+    /* wm kernel use urb from 3, see wm_program in compiler module */
+    wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
+
+    wm_state->wm4.stats_enable = 1;  /* statistic */
+    wm_state->wm4.sampler_state_pointer = src_sampler_offset >> 5;
+    wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
+    wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
+    wm_state->wm5.thread_dispatch_enable = 1;
+    /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
+     * start point
+     */
+    wm_state->wm5.enable_16_pix = 1;
+    wm_state->wm5.enable_8_pix = 0;
+    wm_state->wm5.early_depth_test = 1;
+}
+
+static void
+i965_update_sf_kernel(ScrnInfoPtr pScrn, char *start_base,
+		      int need_sf_kernel)
+{
+    memcpy(start_base + sf_kernel_offset, sf_kernels[need_sf_kernel].kernel, sf_kernels[need_sf_kernel].size);
+}
+
+static void
+i965_update_ps_kernel(ScrnInfoPtr pScrn, char *start_base,
+		      int need_ps_kernel)
+{
+    memcpy(start_base + ps_kernel_offset, ps_kernels[need_ps_kernel].kernel, ps_kernels[need_ps_kernel].size);
+}
+
+void
+i965_exastate_reset(struct i965_exastate_buffer *state)
+{
+    I830Ptr pI830 = I830PTR(state->pScrn);
+
+    if (state->buf != NULL) {
+	dri_bo_unreference(state->buf);
+	state->buf = NULL;
+    }
+
+    state->buf = dri_bo_alloc(pI830->bufmgr, "exa state buffer",
+			      EXASTATE_SZ, 4096,
+			      DRM_BO_FLAG_MEM_TT);
+    dri_bo_map(state->buf, TRUE);
+
+    state->map = state->buf->virtual;
+    i965_init_state_objects(state->pScrn, state->map);
+}
+
+Bool
+i965_prepare_composite(int op, PicturePtr pSrcPicture,
+		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
+		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
+    I830Ptr pI830 = I830PTR(pScrn);
+    CARD32 src_pitch, src_tile_format, src_tiled;
+    CARD32 mask_pitch = 0, mask_tile_format = 0, mask_tiled = 0;
+    CARD32 dst_format, dst_pitch, dst_tile_format, dst_tiled;
+    Bool rotation_program = FALSE;
+    struct brw_cc_unit_state *cc_state;
+    int need_ps_kernel, need_sf_kernel;
+    char *start_base;
+    void *map;
+
+    if (pI830->use_ttm_batch) {
+	i965_exastate_reset(pI830->exa965);
+	map = pI830->exa965->map;
+    }else{
+	map = pI830->exa_965_state->offset + pI830->FbBase;
+    }
 
-    src_surf_state->ss1.base_addr = src_offset;
+    start_base = map;
+
+    IntelEmitInvarientState(pScrn);
+    *pI830->last_3d = LAST_3D_RENDER;
+
+    src_pitch = intel_get_pixmap_pitch(pSrc);
+    if (i830_pixmap_tiled(pSrc)) {
+        src_tiled = 1;
+	src_tile_format = 0; /* Tiled X */
+    }
+
+    dst_pitch = intel_get_pixmap_pitch(pDst);
+    if (i830_pixmap_tiled(pDst)) {
+        dst_tiled = 1;
+	dst_tile_format = 0; /* Tiled X */
+    }
+
+    if (pMask) {
+	mask_pitch = intel_get_pixmap_pitch(pMask);
+	if (i830_pixmap_tiled(pMask)) {
+  	    mask_tiled = 1;
+	    mask_tile_format = 0;
+	}
+    }
+    pI830->scale_units[0][0] = pSrc->drawable.width;
+    pI830->scale_units[0][1] = pSrc->drawable.height;
+
+    pI830->transform[0] = pSrcPicture->transform;
+
+    if (!pMask) {
+	pI830->transform[1] = NULL;
+	pI830->scale_units[1][0] = -1;
+	pI830->scale_units[1][1] = -1;
+	if (pI830->transform[0] && 
+		i965_check_rotation_transform(pI830->transform[0]))
+	    rotation_program = TRUE;
+    } else {
+	pI830->transform[1] = pMaskPicture->transform;
+	if (pI830->transform[1])
+	    I830FALLBACK("i965 mask transform not implemented!\n");
+	pI830->scale_units[1][0] = pMask->drawable.width;
+	pI830->scale_units[1][1] = pMask->drawable.height;
+    }
+
+    /* setup 3d pipeline state */
+
+    /* Because we only have a single static buffer for our state currently,
+     * we have to sync before updating it every time.
+     */
+    vb = (void *)(start_base + vb_offset);
+    vb_index = 0;
+    /* Color calculator state */
+    cc_state = (void *)(start_base + cc_offset);
+    i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format,
+			&src_blend, &dst_blend);
+    /* XXX: alpha blend factor should be same as color, but check
+     * for CA case in future
+     */
+    cc_state->cc5.ia_src_blend_factor = src_blend;
+    cc_state->cc5.ia_dest_blend_factor = dst_blend;
+    cc_state->cc6.src_blend_factor = src_blend;
+    cc_state->cc6.dest_blend_factor = dst_blend;
+
+
+    /* Set up the state buffer for the destination surface */
+    dest_surf_state = (void *)(start_base + dest_surf_offset);
+    i965_get_dest_format(pDstPicture, &dst_format);
+    dest_surf_state->ss0.surface_format = dst_format;
+
+#ifdef I830_USE_BB
+    i830_batchbuffer_emit_pixmap(pDst,
+				 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE,
+				 DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE | DRM_BO_FLAG_CACHED,
+				 exa_buf->bo.handle, dest_surf_offset + 4, 0);
+#else
+    dest_surf_state->ss1.base_addr = intel_get_pixmap_offset(pDst);
+#endif
+    dest_surf_state->ss2.height = pDst->drawable.height - 1;
+    dest_surf_state->ss2.width = pDst->drawable.width - 1;
+    dest_surf_state->ss3.pitch = dst_pitch - 1;
+    dest_surf_state->ss3.tile_walk = dst_tile_format;
+    dest_surf_state->ss3.tiled_surface = dst_tiled;
+
+    /* Set up the source surface state buffer */
+    src_surf_state = (void *)(start_base + src_surf_offset);
+    src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture);
+
+#ifdef I830_USE_BB
+    i830_batchbuffer_emit_pixmap(pSrc,
+				 DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+				 DRM_BO_MASK_MEM | DRM_BO_FLAG_READ | DRM_BO_FLAG_CACHED,
+				 exa_buf->bo.handle, src_surf_offset + 4, 0);
+#else
+    src_surf_state->ss1.base_addr = intel_get_pixmap_offset(pSrc);
+#endif
     src_surf_state->ss2.width = pSrc->drawable.width - 1;
     src_surf_state->ss2.height = pSrc->drawable.height - 1;
-    src_surf_state->ss2.mip_count = 0;
-    src_surf_state->ss2.render_target_rotation = 0;
     src_surf_state->ss3.pitch = src_pitch - 1;
     src_surf_state->ss3.tile_walk = src_tile_format;
     src_surf_state->ss3.tiled_surface = src_tiled;
 
-    src_surf_state = (void *)(state_base + src_surf_offset);
-    memcpy (src_surf_state, &src_surf_state_local, sizeof (src_surf_state_local));
-
     /* setup mask surface */
     if (pMask) {
-	mask_surf_state = &mask_surf_state_local;
-   	memset(mask_surf_state, 0, sizeof(*mask_surf_state));
-	mask_surf_state->ss0.surface_type = BRW_SURFACE_2D;
-   	mask_surf_state->ss0.surface_format =
-	    i965_get_card_format(pMaskPicture);
-
-   	mask_surf_state->ss0.writedisable_alpha = 0;
-   	mask_surf_state->ss0.writedisable_red = 0;
-   	mask_surf_state->ss0.writedisable_green = 0;
-   	mask_surf_state->ss0.writedisable_blue = 0;
-   	mask_surf_state->ss0.color_blend = 1;
-   	mask_surf_state->ss0.vert_line_stride = 0;
-   	mask_surf_state->ss0.vert_line_stride_ofs = 0;
-   	mask_surf_state->ss0.mipmap_layout_mode = 0;
-   	mask_surf_state->ss0.render_cache_read_mode = 0;
-
-   	mask_surf_state->ss1.base_addr = mask_offset;
+	mask_surf_state = (void *)(start_base + mask_surf_offset);
+   	mask_surf_state->ss0.surface_format = i965_get_card_format(pMaskPicture);
+#ifdef I830_USE_BB
+	i830_batchbuffer_emit_pixmap(pMask, 
+				     DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ,
+				     DRM_BO_MASK_MEM | DRM_BO_FLAG_READ | DRM_BO_FLAG_CACHED,
+				     exa_buf->bo.handle, mask_surf_offset + 4, 0);
+#else
+	mask_surf_state->ss1.base_addr = intel_get_pixmap_offset(pMask);
+#endif
    	mask_surf_state->ss2.width = pMask->drawable.width - 1;
    	mask_surf_state->ss2.height = pMask->drawable.height - 1;
-   	mask_surf_state->ss2.mip_count = 0;
-   	mask_surf_state->ss2.render_target_rotation = 0;
    	mask_surf_state->ss3.pitch = mask_pitch - 1;
 	mask_surf_state->ss3.tile_walk = mask_tile_format;
 	mask_surf_state->ss3.tiled_surface = mask_tiled;
-
-	mask_surf_state = (void *)(state_base + mask_surf_offset);
-	memcpy (mask_surf_state, &mask_surf_state_local, sizeof (mask_surf_state_local));
     }
 
+    binding_table = (void *)(start_base + binding_table_offset);
     /* Set up a binding table for our surfaces.  Only the PS will use it */
-    binding_table[0] = state_base_offset + dest_surf_offset;
-    binding_table[1] = state_base_offset + src_surf_offset;
+    binding_table[0] = dest_surf_offset;
+    binding_table[1] = src_surf_offset;
     if (pMask)
-   	binding_table[2] = state_base_offset + mask_surf_offset;
+   	binding_table[2] = mask_surf_offset;
+    else
+	binding_table[2] = 0;
 
     /* PS kernel use this sampler */
-    src_sampler_state = &src_sampler_state_local;
-    memset(src_sampler_state, 0, sizeof(*src_sampler_state));
-    src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+    src_sampler_state = (void *)(start_base + src_sampler_offset);
     switch(pSrcPicture->filter) {
     case PictFilterNearest:
    	src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
@@ -736,34 +883,19 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter);
     }
 
-    memset(default_color_state, 0, sizeof(*default_color_state));
-    default_color_state->color[0] = 0.0; /* R */
-    default_color_state->color[1] = 0.0; /* G */
-    default_color_state->color[2] = 0.0; /* B */
-    default_color_state->color[3] = 0.0; /* A */
-
-    src_sampler_state->ss0.default_color_mode = 0; /* GL mode */
-
     if (!pSrcPicture->repeat) {
    	src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
    	src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
    	src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER;
-	src_sampler_state->ss2.default_color_pointer =
-	    (state_base_offset + default_color_offset) >> 5;
+	src_sampler_state->ss2.default_color_pointer = default_color_offset >> 5;
     } else {
    	src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    	src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    	src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
     }
-    src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
-
-    src_sampler_state = (void *)(state_base + src_sampler_offset);
-    memcpy (src_sampler_state, &src_sampler_state_local, sizeof (src_sampler_state_local));
 
     if (pMask) {
-	mask_sampler_state = &mask_sampler_state_local;
-   	memset(mask_sampler_state, 0, sizeof(*mask_sampler_state));
-   	mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */
+	mask_sampler_state = (void *)(start_base + mask_sampler_offset);
    	switch(pMaskPicture->filter) {
    	case PictFilterNearest:
    	    mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST;
@@ -785,138 +917,54 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
    	    mask_sampler_state->ss1.t_wrap_mode =
 		BRW_TEXCOORDMODE_CLAMP_BORDER;
             mask_sampler_state->ss2.default_color_pointer =
-		(state_base_offset + default_color_offset)>>5;
+		default_color_offset >> 5;
    	} else {
    	    mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    	    mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP;
    	    mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP;
     	}
-   	mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */
-
-	mask_sampler_state = (void *)(state_base + mask_sampler_offset);
-	memcpy (mask_sampler_state, &mask_sampler_state_local, sizeof (mask_sampler_state_local));
     }
 
-    /* Set up the vertex shader to be disabled (passthrough) */
-    vs_state = &vs_state_local;
-    memset(vs_state, 0, sizeof(*vs_state));
-    vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
-    vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
-    vs_state->vs6.vs_enable = 0;
-    vs_state->vs6.vert_cache_disable = 1;
-
-    vs_state = (void *)(state_base + vs_offset);
-    memcpy (vs_state, &vs_state_local, sizeof (vs_state_local));
 
     /* Set up the SF kernel to do coord interp: for each attribute,
      * calculate dA/dx and dA/dy.  Hand these interpolation coefficients
      * back to SF which then hands pixels off to WM.
      */
     if (pMask)
-	memcpy(sf_kernel, sf_kernel_static_mask,
-		sizeof (sf_kernel_static_mask));
+	need_sf_kernel = SF_KERNEL_MASK;
     else if (rotation_program)
-	memcpy(sf_kernel, sf_kernel_static_rotation, 
-		sizeof (sf_kernel_static_rotation));
+	need_sf_kernel = SF_KERNEL_ROTATION;
     else
-	memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
-
-    sf_state = &sf_state_local;
-    memset(sf_state, 0, sizeof(*sf_state));
-    sf_state->thread0.kernel_start_pointer =
-	(state_base_offset + sf_kernel_offset) >> 6;
-    sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
-    sf_state->sf1.single_program_flow = 1;
-    sf_state->sf1.binding_table_entry_count = 0;
-    sf_state->sf1.thread_priority = 0;
-    sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
-    sf_state->sf1.illegal_op_exception_enable = 1;
-    sf_state->sf1.mask_stack_exception_enable = 1;
-    sf_state->sf1.sw_exception_enable = 1;
-    sf_state->thread2.per_thread_scratch_space = 0;
-    /* scratch space is not used in our kernel */
-    sf_state->thread2.scratch_space_base_pointer = 0;
-    sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
-    sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
-    sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
-    /* don't smash vertex header, read start from dw8 */
-    sf_state->thread3.urb_entry_read_offset = 1;
-    sf_state->thread3.dispatch_grf_start_reg = 3;
-    sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
-    sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
-    sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
-    sf_state->thread4.stats_enable = 1;
-    sf_state->sf5.viewport_transform = FALSE; /* skip viewport */
-    sf_state->sf6.cull_mode = BRW_CULLMODE_NONE;
-    sf_state->sf6.scissor = 0;
-    sf_state->sf7.trifan_pv = 2;
-    sf_state->sf6.dest_org_vbias = 0x8;
-    sf_state->sf6.dest_org_hbias = 0x8;
+	need_sf_kernel = SF_KERNEL;
+    
+    i965_update_sf_kernel(pScrn, start_base, need_sf_kernel);
 
-    sf_state = (void *)(state_base + sf_offset);
-    memcpy (sf_state, &sf_state_local, sizeof (sf_state_local));
-
-   /* Set up the PS kernel (dispatched by WM) */
+    /* Set up the PS kernel (dispatched by WM) */
     if (pMask) {
 	if (pMaskPicture->componentAlpha && 
-                PICT_FORMAT_RGB(pMaskPicture->format)) {
+	    PICT_FORMAT_RGB(pMaskPicture->format)) {
             if (i965_blend_op[op].src_alpha) 
-                memcpy(ps_kernel, ps_kernel_static_maskca_srcalpha,
-                        sizeof (ps_kernel_static_maskca_srcalpha));
+		need_ps_kernel = PS_KERNEL_MASKCA_SRCALPHA;
             else
-                memcpy(ps_kernel, ps_kernel_static_maskca,
-                        sizeof (ps_kernel_static_maskca));
+		need_ps_kernel = PS_KERNEL_MASKCA;
         } else
-   	    memcpy(ps_kernel, ps_kernel_static_masknoca,
-		   sizeof (ps_kernel_static_masknoca));
+	    need_ps_kernel = PS_KERNEL_MASKNOCA;
     } else if (rotation_program) {
-   	memcpy(ps_kernel, ps_kernel_static_rotation,
-	       sizeof (ps_kernel_static_rotation));
+	need_ps_kernel = PS_KERNEL_ROTATION;
     } else {
-   	memcpy(ps_kernel, ps_kernel_static_nomask,
-	       sizeof (ps_kernel_static_nomask));
+	need_ps_kernel = PS_KERNEL_NOMASK;
     }
 
-    wm_state = &wm_state_local;
-    memset(wm_state, 0, sizeof (*wm_state));
-    wm_state->thread0.kernel_start_pointer =
-	(state_base_offset + ps_kernel_offset) >> 6;
-    wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
-    wm_state->thread1.single_program_flow = 1;
-    if (!pMask)
+    i965_update_ps_kernel(pScrn, start_base, need_ps_kernel);
+    
+    wm_state = (void *)(start_base + wm_offset);
+    if (!pMask) {
 	wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */
-    else
+	wm_state->thread3.urb_entry_read_length = 1;
+    } else {
 	wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */
-
-    wm_state->thread2.scratch_space_base_pointer = (state_base_offset +
-						    wm_scratch_offset)>>10;
-    wm_state->thread2.per_thread_scratch_space = 0;
-    wm_state->thread3.const_urb_entry_read_length = 0;
-    wm_state->thread3.const_urb_entry_read_offset = 0;
-    /* Each pair of attributes (src/mask coords) is one URB entry */
-    if (pMask)
 	wm_state->thread3.urb_entry_read_length = 2;
-    else
-	wm_state->thread3.urb_entry_read_length = 1;
-    wm_state->thread3.urb_entry_read_offset = 0;
-    /* wm kernel use urb from 3, see wm_program in compiler module */
-    wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */
-
-    wm_state->wm4.stats_enable = 1;  /* statistic */
-    wm_state->wm4.sampler_state_pointer = (state_base_offset +
-					   src_sampler_offset) >> 5;
-    wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */
-    wm_state->wm5.max_threads = PS_MAX_THREADS - 1;
-    wm_state->wm5.thread_dispatch_enable = 1;
-    /* just use 16-pixel dispatch (4 subspans), don't need to change kernel
-     * start point
-     */
-    wm_state->wm5.enable_16_pix = 1;
-    wm_state->wm5.enable_8_pix = 0;
-    wm_state->wm5.early_depth_test = 1;
-
-    wm_state = (void *)(state_base + wm_offset);
-    memcpy (wm_state, &wm_state_local, sizeof (wm_state_local));
+    }
 
     /* Begin the long sequence of commands needed to set up the 3D
      * rendering pipe
@@ -924,8 +972,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     {
 	BEGIN_BATCH(2);
    	OUT_BATCH(MI_FLUSH |
-		 MI_STATE_INSTRUCTION_CACHE_FLUSH |
-		 BRW_MI_GLOBAL_SNAPSHOT_RESET);
+		  MI_STATE_INSTRUCTION_CACHE_FLUSH |
+		  BRW_MI_GLOBAL_SNAPSHOT_RESET);
 	OUT_BATCH(MI_NOOP);
 	ADVANCE_BATCH();
     }
@@ -943,8 +991,16 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 	 * absolute.
 	 */
    	OUT_BATCH(BRW_STATE_BASE_ADDRESS | 4);
-   	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Generate state base address */
-   	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* Surface state base address */
+
+	if (pI830->use_ttm_batch) {
+	    OUT_RELOC(pI830->exa965->buf, DRM_BO_FLAG_MEM_TT, BASE_ADDRESS_MODIFY);
+
+	    OUT_RELOC(pI830->exa965->buf, DRM_BO_FLAG_MEM_TT, BASE_ADDRESS_MODIFY);
+	} else {
+	    OUT_BATCH(pI830->exa_965_state->offset | BASE_ADDRESS_MODIFY);
+	    OUT_BATCH(pI830->exa_965_state->offset | BASE_ADDRESS_MODIFY);
+	}
+
    	OUT_BATCH(0 | BASE_ADDRESS_MODIFY);  /* media base addr, don't care */
 	/* general state max addr, disabled */
    	OUT_BATCH(0x10000000 | BASE_ADDRESS_MODIFY);
@@ -953,7 +1009,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 
 	/* Set system instruction pointer */
    	OUT_BATCH(BRW_STATE_SIP | 0);
-   	OUT_BATCH(state_base_offset + sip_kernel_offset);
+   	OUT_BATCH(sip_kernel_offset);
 	OUT_BATCH(MI_NOOP);
 	ADVANCE_BATCH();
     }
@@ -975,7 +1031,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
    	OUT_BATCH(0); /* clip */
    	OUT_BATCH(0); /* sf */
 	/* Only the PS uses the binding table */
-   	OUT_BATCH(state_base_offset + binding_table_offset); /* ps */
+   	OUT_BATCH(binding_table_offset); /* ps */
 
 	/* The drawing rectangle clipping is always on.  Set it to values that
 	 * shouldn't do any clipping.
@@ -993,12 +1049,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 
 	/* Set the pointers to the 3d pipeline state */
    	OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5);
-   	OUT_BATCH(state_base_offset + vs_offset);  /* 32 byte aligned */
+   	OUT_BATCH(vs_offset);  /* 32 byte aligned */
    	OUT_BATCH(BRW_GS_DISABLE);   /* disable GS, resulting in passthrough */
    	OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */
-   	OUT_BATCH(state_base_offset + sf_offset);  /* 32 byte aligned */
-   	OUT_BATCH(state_base_offset + wm_offset);  /* 32 byte aligned */
-   	OUT_BATCH(state_base_offset + cc_offset);  /* 64 byte aligned */
+   	OUT_BATCH(sf_offset);  /* 32 byte aligned */
+   	OUT_BATCH(wm_offset);  /* 32 byte aligned */
+   	OUT_BATCH(cc_offset);  /* 64 byte aligned */
 
 	/* URB fence */
    	OUT_BATCH(BRW_URB_FENCE |
@@ -1022,14 +1078,22 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     }
     {
         int nelem = pMask ? 3: 2;
+
    	BEGIN_BATCH(pMask?12:10);
 	/* Set up the pointer to our vertex buffer */
    	OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
    	OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) |
 	    	 VB0_VERTEXDATA |
 	    	 ((4 * 2 * nelem) << VB0_BUFFER_PITCH_SHIFT));
-   	OUT_BATCH(state_base_offset + vb_offset);
-        OUT_BATCH(3);
+
+	if (pI830->use_ttm_batch) {
+	    OUT_RELOC(pI830->exa965->buf, DRM_BO_FLAG_MEM_TT, vb_offset);
+
+	} else {
+	    OUT_BATCH(pI830->exa_965_state->offset + vb_offset);
+	}
+
+        OUT_BATCH((vb_max_size / sizeof(float))); // set max index
    	OUT_BATCH(0); // ignore for VERTEXDATA, but still there
 
 	/* Set up our vertex elements, sourced from the single vertex buffer.
@@ -1037,7 +1101,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
    	OUT_BATCH(BRW_3DSTATE_VERTEX_ELEMENTS | ((2 * nelem) - 1));
 	/* vertex coordinates */
    	OUT_BATCH((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
-	    	 VE0_VALID |
+		  VE0_VALID |
 	    	 (BRW_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
 	    	 (0 << VE0_OFFSET_SHIFT));
    	OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
@@ -1077,7 +1141,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
 #endif
     return TRUE;
 }
-
+		       
 void
 i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	       int dstX, int dstY, int w, int h)
@@ -1116,9 +1180,12 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
     /* Wait for any existing composite rectangles to land before we overwrite
      * the VB with the next one.
      */
-    i830WaitSync(pScrn);
+    if ((vb_index + 18) > (vb_max_size / sizeof(float))) {
+      ErrorF("vb index exceeded maximum bailing...");
+      return;
+    }
 
-    i = 0;
+    i = vb_index;
     /* rect (x2,y2) */
     vb[i++] = (float)(dstX + w);
     vb[i++] = (float)(dstY + h);
@@ -1157,18 +1224,31 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	       (0 << 9) |  /* CTG - indirect vertex count */
 	       4);
       OUT_BATCH(3);  /* vertex count per instance */
-      OUT_BATCH(0); /* start vertex offset */
-      OUT_BATCH(1); /* single instance */
+      OUT_BATCH(vb_index); /* start vertex offset */
+      OUT_BATCH(1); /* single instance - mbz in docs */
       OUT_BATCH(0); /* start instance location */
       OUT_BATCH(0); /* index buffer offset, ignored */
       ADVANCE_BATCH();
     }
+
+    vb_index = i;
+
 #ifdef I830DEBUG
     ErrorF("sync after 3dprimitive");
     I830Sync(pScrn);
 #endif
     /* we must be sure that the pipeline is flushed before next exa draw,
        because that will be new state, binding state and instructions*/
+    /* Mark sync so we can wait for it before setting up the VB on the next
+     * rectangle.
+     */
+}
+
+void i965_done_composite(PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    I830Ptr pI830 = I830PTR(pScrn);
+
     {
 	BEGIN_BATCH(4);
    	OUT_BATCH(BRW_PIPE_CONTROL |
@@ -1183,8 +1263,41 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 	ADVANCE_BATCH();
     }
 
-    /* Mark sync so we can wait for it before setting up the VB on the next
-     * rectangle.
-     */
-    i830MarkSync(pScrn);
+    if (pI830->use_ttm_batch) {
+	dri_bo_unmap(pI830->exa965->buf);
+	intel_batchbuffer_flush(pI830->batch);
+    } else {
+	I830Sync(pScrn);
+    }
+
+}
+
+static struct i965_exastate_buffer *
+i965_exastate_alloc(ScrnInfoPtr pScrn)
+{
+    struct i965_exastate_buffer *state = calloc(sizeof(*state), 1);
+
+    state->pScrn = pScrn;
+    i965_exastate_reset(state);
+    return state;
+
+}
+
+int
+i965_init_exa_state(ScrnInfoPtr pScrn)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+
+    i965_init_state_offsets(pScrn, EXASTATE_SZ);
+
+    if (pI830->use_ttm_batch) {
+
+	
+	pI830->exa965 = i965_exastate_alloc(pScrn);
+    } else {
+	void *map = pI830->FbBase + pI830->exa_965_state->offset;
+	i965_init_state_objects(pScrn, map);
+    }
+
+    return 0;
 }
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 4c51c5b..6784bdc 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -6,6 +6,7 @@
 struct intel_context;
 
 #define BATCH_SZ 16384
+#define EXASTATE_SZ 48000
 #define BATCH_RESERVED 16
 
 struct intel_batchbuffer
@@ -22,6 +23,13 @@ struct intel_batchbuffer
    uint32_t size;
 };
 
+struct i965_exastate_buffer {
+   dri_bo *buf;
+   dri_fence *last_fence;
+   ScrnInfoPtr pScrn;
+   unsigned char *map;
+};
+
 struct intel_batchbuffer *intel_batchbuffer_alloc(ScrnInfoPtr pScrn);
 
 void intel_batchbuffer_free(struct intel_batchbuffer *batch);
@@ -109,7 +117,7 @@ extern Bool intel_batchbuffer_emit_pixmap(PixmapPtr pPixmap, unsigned int flags,
         OUT_BATCH(tmp.ui);                      \
 } while(0)
 
-#define OUT_RELOC(buf, flags, delta) do { 				\
+#define OUT_RELOC(buf, flags, delta) do {	\
    intel_batchbuffer_emit_reloc(pI830->batch, buf, flags, delta);	\
 } while (0)
 


More information about the xorg-commit mailing list