xf86-video-intel: 4 commits - src/i830_batchbuffer.c src/i830_driver.c src/i830.h src/i830_uxa.c uxa/uxa-accel.c
Chris Wilson
ickle at kemper.freedesktop.org
Thu Apr 15 14:18:44 PDT 2010
src/i830.h | 13 +++-
src/i830_batchbuffer.c | 12 ++++
src/i830_driver.c | 1
src/i830_uxa.c | 135 +++++++++++++++++++++++++++++++------------------
uxa/uxa-accel.c | 51 +++++++++++++++---
5 files changed, 154 insertions(+), 58 deletions(-)
New commits:
commit c374c94e41d6e7d677334171e3255778d77cbe18
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Mar 31 11:50:27 2010 +0100
uxa: Reuse in-flight bo
When we need to allocate a new bo for use as a gpu target, first check
if we can reuse a pixmap that has already been relocated into the
aperture as a temporary target, for instance a glyph mask or a clip mask.
Before:
backend test min(s) median(s) stddev.
xlib firefox-planet-gnome 50.568 50.873 0.30%
xcb firefox-planet-gnome 49.686 53.003 3.92%
xlib evolution 40.115 40.131 0.86%
xcb evolution 28.241 28.285 0.18%
After:
backend test min(s) median(s) stddev.
xlib firefox-planet-gnome 47.759 48.233 0.80%
xcb firefox-planet-gnome 48.611 48.657 0.87%
xlib evolution 38.954 38.991 0.05%
xcb evolution 26.561 26.654 0.19%
And even more dramatic improvements when using a font size larger than
the maximum size of the glyph cache:
xcb firefox-36-20090611: 1.79x speedup
xlib firefox-36-20090611: 1.74x speedup
xcb firefox-36-20090609: 1.62x speedup
xlib firefox-36-20090609: 1.59x speedup
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830.h b/src/i830.h
index 43c5887..88949f6 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -136,14 +136,22 @@ list_is_empty(struct list *head)
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
+#define list_foreach(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_foreach_entry(pos, type, head, member) \
+ for (pos = list_entry((head)->next, type, member);\
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, type, member))
+
struct intel_pixmap {
dri_bo *bo;
- uint32_t tiling;
+ uint32_t tiling, stride;
uint32_t flush_write_domain;
uint32_t flush_read_domains;
uint32_t batch_write_domain;
uint32_t batch_read_domains;
- struct list flush, batch;
+ struct list flush, batch, in_flight;
};
struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap);
@@ -252,6 +260,7 @@ typedef struct intel_screen_private {
int batch_atomic_limit;
struct list batch_pixmaps;
struct list flush_pixmaps;
+ struct list in_flight;
/* For Xvideo */
Bool use_drmmode_overlay;
diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index a02e980..492472e 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -195,6 +195,18 @@ void intel_batch_submit(ScrnInfoPtr scrn)
list_del(&entry->flush);
}
+ while (!list_is_empty(&intel->in_flight)) {
+ struct intel_pixmap *entry;
+
+ entry = list_first_entry(&intel->in_flight,
+ struct intel_pixmap,
+ in_flight);
+
+ dri_bo_unreference(entry->bo);
+ list_del(&entry->in_flight);
+ xfree(entry);
+ }
+
/* Save a ref to the last batch emitted, which we use for syncing
* in debug code.
*/
diff --git a/src/i830_driver.c b/src/i830_driver.c
index d0ce552..db8af06 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -1054,6 +1054,7 @@ void i830_init_bufmgr(ScrnInfoPtr scrn)
list_init(&intel->batch_pixmaps);
list_init(&intel->flush_pixmaps);
+ list_init(&intel->in_flight);
}
Bool i830_crtc_on(xf86CrtcPtr crtc)
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index f3f0f65..984069e 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -127,7 +127,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
{
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
- int pitch_align;
+ int pitch, pitch_align;
int size;
if (*tiling != I915_TILING_NONE) {
@@ -151,6 +151,9 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
}
}
+ pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
+ if (pitch <= 256)
+ *tiling = I915_TILING_NONE;
repeat:
if (*tiling == I915_TILING_NONE) {
pitch_align = intel->accel_pixmap_pitch_alignment;
@@ -158,8 +161,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
pitch_align = 512;
}
- *stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
- pitch_align);
+ *stride = ROUND_TO(pitch, pitch_align);
if (*tiling == I915_TILING_NONE) {
/* Round the height up so that the GPU's access to a 2x2 aligned
@@ -548,17 +550,19 @@ dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap)
void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
{
+ ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+ intel_screen_private *intel = intel_get_screen_private(scrn);
struct intel_pixmap *priv;
priv = i830_get_pixmap_intel(pixmap);
if (priv != NULL) {
- dri_bo_unreference(priv->bo);
-
- priv->flush_read_domains = priv->flush_write_domain = 0;
- priv->batch_read_domains = priv->batch_write_domain = 0;
- list_del(&priv->batch);
- list_del(&priv->flush);
+ if (list_is_empty(&priv->batch)) {
+ dri_bo_unreference(priv->bo);
+ } else {
+ list_add(&priv->in_flight, &intel->in_flight);
+ priv = NULL;
+ }
}
if (bo != NULL) {
@@ -576,6 +580,7 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
dri_bo_reference(bo);
priv->bo = bo;
+ priv->stride = i830_pixmap_pitch(pixmap);
ret = drm_intel_bo_get_tiling(bo,
&priv->tiling,
@@ -883,35 +888,26 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
if (w && h) {
struct intel_pixmap *priv;
- unsigned int size;
+ unsigned int size, tiling;
int stride;
- priv = xcalloc(1, sizeof (struct intel_pixmap));
- if (priv == NULL) {
- fbDestroyPixmap(pixmap);
- return NullPixmap;
- }
-
/* Always attempt to tile, compute_size() will remove the
* tiling for pixmaps that are either too large or too small
* to be effectively tiled.
*/
- priv->tiling = I915_TILING_X;
+ tiling = I915_TILING_X;
if (usage == INTEL_CREATE_PIXMAP_TILING_Y)
- priv->tiling = I915_TILING_Y;
+ tiling = I915_TILING_Y;
if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
- priv->tiling = I915_TILING_NONE;
-
- if (priv->tiling != I915_TILING_NONE) {
- if (w < 256)
- priv->tiling = I915_TILING_NONE;
- if (h < 8)
- priv->tiling = I915_TILING_NONE;
- if (h < 32 && priv->tiling == I915_TILING_Y)
- priv->tiling = I915_TILING_X;
+ tiling = I915_TILING_NONE;
+
+ if (tiling != I915_TILING_NONE) {
+ if (h <= 4)
+ tiling = I915_TILING_NONE;
+ if (h <= 16 && tiling == I915_TILING_Y)
+ tiling = I915_TILING_X;
}
- size = i830_uxa_pixmap_compute_size(pixmap, w, h,
- &priv->tiling, &stride);
+ size = i830_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride);
/* Fail very large allocations on 32-bit systems. Large BOs will
* tend to hit SW fallbacks frequently, and also will tend to fail
@@ -923,7 +919,37 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
*/
if (sizeof(unsigned long) == 4 &&
size > (unsigned int)(1024 * 1024 * 1024)) {
- xfree(priv);
+ fbDestroyPixmap(pixmap);
+ return NullPixmap;
+ }
+
+ /* Perform a premilinary search for an in-flight bo */
+ if (usage != UXA_CREATE_PIXMAP_FOR_MAP) {
+ int aligned_h;
+
+ if (tiling == I915_TILING_X)
+ aligned_h = ALIGN(h, 8);
+ else if (tiling == I915_TILING_Y)
+ aligned_h = ALIGN(h, 32);
+ else
+ aligned_h = ALIGN(h, 2);
+
+ list_foreach_entry(priv, struct intel_pixmap,
+ &intel->in_flight,
+ in_flight) {
+ if (priv->tiling == tiling &&
+ priv->stride >= stride &&
+ priv->bo->size >= priv->stride * aligned_h) {
+ list_del(&priv->in_flight);
+ screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, priv->stride, NULL);
+ i830_uxa_set_pixmap_intel(pixmap, priv);
+ return pixmap;
+ }
+ }
+ }
+
+ priv = xcalloc(1, sizeof (struct intel_pixmap));
+ if (priv == NULL) {
fbDestroyPixmap(pixmap);
return NullPixmap;
}
@@ -941,6 +967,8 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
return NullPixmap;
}
+ priv->stride = stride;
+ priv->tiling = tiling;
if (priv->tiling != I915_TILING_NONE)
drm_intel_bo_set_tiling(priv->bo,
&priv->tiling,
commit 96aa7a236ac0605324a94f7b7d10413cb219f071
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Apr 14 15:04:53 2010 +0100
i830: Allocate bo's for glyphs larger than 32x32.
As we only use the glyph cache for small glyphs, those large than 32x32
will first be copied to a bo and used as a mask in a composite
operation. We can avoid the allocation and upload per use by allocating
a bo for the over-sized glyph from the start. As the glyph is large
anyway, the excess memory allocation is less significant.
Using normal font sizes, firefox shows no change - as expected. However,
using the 36 font size traces, we see around a 10% improvement on g45.
Before:
xcb firefox-36-20090609 127.333 127.897 0.22%
xcb firefox-36-20090611 87.456 88.624 0.66%
xcb firefox-20090601 19.522 20.194 1.69%
xlib firefox-36-20090609 201.054 201.780 0.18%
xlib firefox-36-20090611 133.468 133.717 0.09%
xlib firefox-20090601 23.740 23.975 0.49%
With large glyphs in bo:
xcb firefox-36-20090609 117.256 118.254 0.42%
xcb firefox-36-20090611 79.462 79.962 0.31%
xcb firefox-20090601 19.658 20.024 0.92%
xlib firefox-36-20090609 185.645 188.202 0.68%
xlib firefox-36-20090611 123.592 124.940 0.54%
xlib firefox-20090601 23.917 24.098 0.38%
Thanks to Owain G. Ainsworth for the suggestion!
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 58ed491..f3f0f65 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -876,7 +876,7 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
if (w > 32767 || h > 32767)
return NullPixmap;
- if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE)
+ if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE && w <= 32 && h <= 32)
return fbCreatePixmap(screen, w, h, depth, usage);
pixmap = fbCreatePixmap(screen, 0, 0, depth, usage);
@@ -902,6 +902,14 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
priv->tiling = I915_TILING_NONE;
+ if (priv->tiling != I915_TILING_NONE) {
+ if (w < 256)
+ priv->tiling = I915_TILING_NONE;
+ if (h < 8)
+ priv->tiling = I915_TILING_NONE;
+ if (h < 32 && priv->tiling == I915_TILING_Y)
+ priv->tiling = I915_TILING_X;
+ }
size = i830_uxa_pixmap_compute_size(pixmap, w, h,
&priv->tiling, &stride);
commit 2d17bd50af367bead84985c22fdd43d264a5f072
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Apr 14 14:54:34 2010 +0100
Revert "Revert "uxa: Try using put_image when copying from a memory buffer.""
This reverts commit 6d50553e8f70d8f2142efdfd6c90bc27a599d0bc.
Now we have taught the fallback path not to infinitely recurse,
re-enable the accelerated path for ShmPutImage and friends.
diff --git a/uxa/uxa-accel.c b/uxa/uxa-accel.c
index cd3e477..4f7fd41 100644
--- a/uxa/uxa-accel.c
+++ b/uxa/uxa-accel.c
@@ -450,18 +450,19 @@ uxa_copy_n_to_n(DrawablePtr pSrcDrawable,
goto fallback;
}
- if (!uxa_pixmap_is_offscreen(pSrcPixmap) ||
- !uxa_pixmap_is_offscreen(pDstPixmap) ||
- !(*uxa_screen->info->prepare_copy) (pSrcPixmap, pDstPixmap,
+ if (!uxa_pixmap_is_offscreen(pDstPixmap))
+ goto fallback;
+
+ if (uxa_pixmap_is_offscreen(pSrcPixmap)) {
+ if (!(*uxa_screen->info->prepare_copy) (pSrcPixmap, pDstPixmap,
reverse ? -1 : 1,
upsidedown ? -1 : 1,
pGC ? pGC->alu : GXcopy,
pGC ? pGC->
- planemask : FB_ALLONES)) {
+ planemask : FB_ALLONES))
goto fallback;
- }
- while (nbox--) {
+ while (nbox--) {
(*uxa_screen->info->copy) (pDstPixmap,
pbox->x1 + dx + src_off_x,
pbox->y1 + dy + src_off_y,
@@ -470,9 +471,43 @@ uxa_copy_n_to_n(DrawablePtr pSrcDrawable,
pbox->x2 - pbox->x1,
pbox->y2 - pbox->y1);
pbox++;
- }
+ }
+
+ (*uxa_screen->info->done_copy) (pDstPixmap);
+ } else {
+ int stride, bpp;
+ char *src;
- (*uxa_screen->info->done_copy) (pDstPixmap);
+ if (!uxa_screen->info->put_image)
+ goto fallback;
+
+ /* Don't bother with under 8bpp, XYPixmaps. */
+ bpp = pSrcPixmap->drawable.bitsPerPixel;
+ if (bpp != pDstDrawable->bitsPerPixel || bpp < 8)
+ goto fallback;
+
+ /* Only accelerate copies: no rop or planemask. */
+ if (pGC && (!UXA_PM_IS_SOLID(pSrcDrawable, pGC->planemask) || pGC->alu != GXcopy))
+ goto fallback;
+
+ src = pSrcPixmap->devPrivate.ptr;
+ stride = pSrcPixmap->devKind;
+ bpp /= 8;
+ while (nbox--) {
+ if (!uxa_screen->info->put_image(pDstPixmap,
+ pbox->x1 + dst_off_x,
+ pbox->y1 + dst_off_y,
+ pbox->x2 - pbox->x1,
+ pbox->y2 - pbox->y1,
+ (char *) src +
+ (pbox->y1 + dy + src_off_y) * stride +
+ (pbox->x1 + dx + src_off_x) * bpp,
+ stride))
+ goto fallback;
+
+ pbox++;
+ }
+ }
return;
commit 1cc2c2c44ac72460cf1c4e6bdc13c612235809c9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Apr 14 11:18:25 2010 +0100
i830: Use pixman_blt directly for performing the in-memory copy
In order to avoid an infinite recursion after enabling CopyArea to use
the put_image acceleration to either stream a blit or to copy in-place,
we cannot call CopyArea from put_image for the fallback path. Instead,
we can simply call pixman_blt directly, which coincidentally is a tiny
bit faster.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 09c2ef3..58ed491 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -767,7 +767,6 @@ static Bool i830_uxa_put_image(PixmapPtr pixmap,
ScrnInfoPtr scrn = xf86Screens[screen->myNum];
PixmapPtr scratch;
struct intel_pixmap *priv;
- Bool scratch_pixmap;
GCPtr gc;
Bool ret;
@@ -804,39 +803,43 @@ static Bool i830_uxa_put_image(PixmapPtr pixmap,
ret = i830_bo_put_image(scratch, bo, src, src_pitch, w, h);
drm_intel_gem_bo_unmap_gtt(bo);
- scratch_pixmap = FALSE;
- if (!ret) {
- (*screen->DestroyPixmap) (scratch);
- return FALSE;
+ if (ret) {
+ gc = GetScratchGC(pixmap->drawable.depth, screen);
+ if (gc) {
+ ValidateGC(&pixmap->drawable, gc);
+
+ (*gc->ops->CopyArea)(&scratch->drawable,
+ &pixmap->drawable,
+ gc, 0, 0, w, h, x, y);
+
+ FreeScratchGC(gc);
+ } else
+ ret = FALSE;
}
+
+ (*screen->DestroyPixmap)(scratch);
} else {
/* bo is not busy so can be mapped without a stall, upload in-place. */
- scratch = GetScratchPixmapHeader(screen, w, h,
- pixmap->drawable.depth,
- pixmap->drawable.bitsPerPixel,
- src_pitch, src);
- scratch_pixmap = TRUE;
- }
-
- ret = FALSE;
- gc = GetScratchGC(pixmap->drawable.depth, screen);
- if (gc) {
- ValidateGC(&pixmap->drawable, gc);
+ if (drm_intel_gem_bo_map_gtt(priv->bo)) {
+ xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+ "%s: bo map failed\n", __FUNCTION__);
+ return FALSE;
+ }
- (*gc->ops->CopyArea)(&scratch->drawable,
- &pixmap->drawable,
- gc, 0, 0, w, h, x, y);
+ pixman_blt((uint32_t *)src, priv->bo->virtual,
+ src_pitch / sizeof(uint32_t),
+ pixmap->devKind / sizeof(uint32_t),
+ pixmap->drawable.bitsPerPixel,
+ pixmap->drawable.bitsPerPixel,
+ 0, 0,
+ x, y,
+ w, h);
- FreeScratchGC(gc);
+ drm_intel_gem_bo_unmap_gtt(priv->bo);
ret = TRUE;
}
- if (scratch_pixmap)
- FreeScratchPixmapHeader(scratch);
- else
- (*screen->DestroyPixmap)(scratch);
-
return ret;
}
More information about the xorg-commit
mailing list