[Intel-gfx] [PATCH] i965: Add batch_flush_notify hook to create new vertex-buffer bo

Carl Worth cworth at cworth.org
Wed Dec 3 22:53:51 CET 2008


This avoids mapping a buffer object which is being referenced
by a batch that has already been flushed, (which is a terribly
expensive operation).

On my machine this brings the performance of x11perf -aa10text
from 85k back to 150k, (where it was before a recent kernel
upgrade). Also, before this patch, when I used my X server
actively performance would drop as low as 15k---hopefully that
bug is gone with this change.
---
 src/i830.h             |    7 +++++++
 src/i830_batchbuffer.c |    3 +++
 src/i830_driver.c      |    5 +++++
 src/i830_exa.c         |    4 ++--
 src/i965_render.c      |   39 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/src/i830.h b/src/i830.h
index 14c371c..8e8c55f 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -531,6 +531,8 @@ typedef struct _I830Rec {
 #endif
    CloseScreenProcPtr CloseScreen;
 
+   void (*batch_flush_notify)(ScrnInfoPtr pScrn);
+
 #ifdef I830_USE_EXA
    ExaDriverPtr	EXADriverPtr;
 #endif
@@ -920,6 +922,11 @@ Bool i965_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask,
 			    PixmapPtr pMaskPixmap, PixmapPtr pDstPixmap);
 void i965_composite(PixmapPtr pDst, int srcX, int srcY,
 		    int maskX, int maskY, int dstX, int dstY, int w, int h);
+void
+i965_done_composite(PixmapPtr pDst);
+
+void
+i965_batch_flush_notify(ScrnInfoPtr pScrn);
 
 Bool
 i830_get_transformed_coordinates(int x, int y, PictTransformPtr transform,
diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index a770616..072ac75 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -201,4 +201,7 @@ intel_batch_flush(ScrnInfoPtr pScrn, Bool flushed)
      */
     if (pI830->memory_manager != NULL)
 	pI830->need_mi_flush = TRUE;
+
+    if (pI830->batch_flush_notify)
+	pI830->batch_flush_notify (pScrn);
 }
diff --git a/src/i830_driver.c b/src/i830_driver.c
index 1407a22..544efd2 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -3358,6 +3358,11 @@ I830ScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
       }
    }
 
+   if (IS_I965G(pI830))
+       pI830->batch_flush_notify = i965_batch_flush_notify;
+   else
+       pI830->batch_flush_notify = NULL;
+
    miInitializeBackingStore(pScreen);
    xf86SetBackingStore(pScreen);
    xf86SetSilkenMouse(pScreen);
diff --git a/src/i830_exa.c b/src/i830_exa.c
index 9fcda51..ded1fbd 100644
--- a/src/i830_exa.c
+++ b/src/i830_exa.c
@@ -348,7 +348,7 @@ I830EXADoneCopy(PixmapPtr pDstPixmap)
 /**
  * Do any cleanup from the Composite operation.
  *
- * This is shared between i830 through i965.
+ * This is shared between i830 through i945.
  */
 void
 i830_done_composite(PixmapPtr pDst)
@@ -686,7 +686,7 @@ I830EXAInit(ScreenPtr pScreen)
  	pI830->EXADriverPtr->CheckComposite = i965_check_composite;
  	pI830->EXADriverPtr->PrepareComposite = i965_prepare_composite;
  	pI830->EXADriverPtr->Composite = i965_composite;
- 	pI830->EXADriverPtr->DoneComposite = i830_done_composite;
+ 	pI830->EXADriverPtr->DoneComposite = i965_done_composite;
     }
 #if EXA_VERSION_MINOR >= 2
     if (!pI830->use_drm_mode)
diff --git a/src/i965_render.c b/src/i965_render.c
index 9863697..008a68c 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -494,6 +494,7 @@ typedef struct _gen4_static_state {
 typedef float gen4_vertex_buffer[VERTEX_BUFFER_SIZE];
 
 typedef struct gen4_composite_op {
+    Bool	active;
     int		op;
     PicturePtr	source_picture;
     PicturePtr	mask_picture;
@@ -1380,6 +1381,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
     struct gen4_render_state *render_state= pI830->gen4_render_state;
     gen4_composite_op *composite_op = &render_state->composite_op;
 
+    composite_op->active = TRUE;
     composite_op->op = op;
     composite_op->source_picture = pSrcPicture;
     composite_op->mask_picture = pMaskPicture;
@@ -1570,6 +1572,43 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY,
 #endif
 }
 
+void
+i965_done_composite(PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state = pI830->gen4_render_state;
+
+    render_state->composite_op.active = FALSE;
+
+#if ALWAYS_SYNC || ALWAYS_FLUSH
+#if ALWAYS_FLUSH
+    intel_batch_flush(pScrn, FALSE);
+#endif
+#if ALWAYS_SYNC
+    I830Sync(pScrn);
+#endif
+#endif
+}
+
+void
+i965_batch_flush_notify(ScrnInfoPtr pScrn)
+{
+    I830Ptr pI830 = I830PTR(pScrn);
+    struct gen4_render_state *render_state = pI830->gen4_render_state;
+
+    /* Once a batch is emitted, we never want to map again any buffer
+     * object being referenced by that batch, (which would be very
+     * expensive). */
+    dri_bo_unreference (render_state->vertex_buffer_bo);
+    render_state->vertex_buffer_bo = NULL;
+
+    if (render_state->composite_op.active)
+	render_state->vertex_buffer_bo = dri_bo_alloc (pI830->bufmgr, "vb",
+						       sizeof (gen4_vertex_buffer),
+						       4096);
+}
+
 /**
  * Called at EnterVT so we can set up our offsets into the state buffer.
  */
-- 
1.5.6.5




More information about the Intel-gfx mailing list