[cairo-commit] 2 commits - perf/cairo-perf.c perf/cairo-perf.h
perf/Makefile.am perf/unaligned-clip.c pixman/src
Carl Worth
cworth at kemper.freedesktop.org
Tue Jan 9 15:47:30 PST 2007
perf/Makefile.am | 9 +
perf/cairo-perf.c | 1
perf/cairo-perf.h | 1
perf/unaligned-clip.c | 66 ++++++++++++++
pixman/src/fbmmx.c | 226 ++++++++++++++++++++++++++++++++++++++++++++++++++
pixman/src/fbmmx.h | 43 +++++++++
pixman/src/fbpict.c | 98 +++++++++++++++++++++
7 files changed, 439 insertions(+), 5 deletions(-)
New commits:
diff-tree cf1d95e714d0814e52910c4306d090ef6d989093 (from d5531c4f506caa9ad66fbeef1822a7036d4dd528)
Author: Soeren Sandmann <sandmann at daimi.au.dk>
Date: Tue Jan 9 15:05:29 2007 -0800
Add SRC and IN implementations to avoid CompositeGeneral in some cases hit by PDF rendering
The patch implements a few more operations with special cases MMX
code. On my laptop, applying the patch to cairo speeds up the
benchmark (rendering page 14 of a PDF file[*]) from 20.9 seconds
to 14.9 seconds, which is an improvement of 28.6%.
[*] http://people.redhat.com/jakub/prelink.pdf
This also benefits the recently added unaligned_clip perf case:
image-rgb unaligned_clip-100 0.11 -> 0.06: 1.65x speedup
â
image-rgba unaligned_clip-100 0.11 -> 0.06: 1.64x speedup
â
diff --git a/pixman/src/fbmmx.c b/pixman/src/fbmmx.c
index a99168c..f6f512f 100644
--- a/pixman/src/fbmmx.c
+++ b/pixman/src/fbmmx.c
@@ -2135,6 +2135,232 @@ fbCompositeSolidMask_nx8888x0565Cmmx (pi
}
void
+fbCompositeIn_nx8x8mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst;
+ CARD8 *maskLine, *mask;
+ FbStride dstStride, maskStride;
+ CARD16 w;
+ CARD32 src;
+ CARD8 sa;
+ __m64 vsrc, vsrca;
+
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+ fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
+
+ fbComposeGetSolid(pSrc, pDst, src);
+
+ sa = src >> 24;
+ if (sa == 0)
+ return;
+
+ vsrc = load8888(src);
+ vsrca = expand_alpha(vsrc);
+
+ while (height--)
+ {
+ dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+ if ((((unsigned long)pDst & 3) == 0) &&
+ (((unsigned long)pSrc & 3) == 0))
+ {
+ while (w >= 4)
+ {
+ CARD32 m;
+ __m64 vmask;
+ __m64 vdest;
+
+ m = 0;
+
+ vmask = load8888 (*(CARD32 *)mask);
+ vdest = load8888 (*(CARD32 *)dst);
+
+ *(CARD32 *)dst = store8888 (in (in (vsrca, vmask), vdest));
+
+ dst += 4;
+ mask += 4;
+ w -= 4;
+ }
+ }
+
+ while (w--)
+ {
+ CARD16 tmp;
+ CARD8 a;
+ CARD32 m, d;
+ CARD32 r;
+
+ a = *mask++;
+ d = *dst;
+
+ m = FbInU (sa, 0, a, tmp);
+ r = FbInU (m, 0, d, tmp);
+
+ *dst++ = r;
+ }
+ }
+
+ _mm_empty();
+}
+
+void
+fbCompositeIn_8x8mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst;
+ CARD8 *srcLine, *src;
+ FbStride srcStride, dstStride;
+ CARD16 w;
+
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+ fbComposeGetStart (pSrc, xSrc, ySrc, CARD8, srcStride, srcLine, 1);
+
+ while (height--)
+ {
+ dst = dstLine;
+ dstLine += dstStride;
+ src = srcLine;
+ srcLine += srcStride;
+ w = width;
+
+ if ((((unsigned long)pDst & 3) == 0) &&
+ (((unsigned long)pSrc & 3) == 0))
+ {
+ while (w >= 4)
+ {
+ CARD32 *s = (CARD32 *)src;
+ CARD32 *d = (CARD32 *)dst;
+
+ *d = store8888 (in (load8888 (*s), load8888 (*d)));
+
+ w -= 4;
+ dst += 4;
+ src += 4;
+ }
+ }
+
+ while (w--)
+ {
+ CARD8 s, d;
+ CARD16 tmp;
+
+ s = *src;
+ d = *dst;
+
+ *dst = FbInU (s, 0, d, tmp);
+
+ src++;
+ dst++;
+ }
+ }
+
+ _mm_empty ();
+}
+
+void
+fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst;
+ CARD8 *maskLine, *mask;
+ FbStride dstStride, maskStride;
+ CARD16 w;
+ CARD32 src;
+ CARD8 sa;
+ __m64 vsrc, vsrca;
+
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+ fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
+
+ fbComposeGetSolid(pSrc, pDst, src);
+
+ sa = src >> 24;
+ if (sa == 0)
+ return;
+
+ vsrc = load8888(src);
+ vsrca = expand_alpha(vsrc);
+
+ while (height--)
+ {
+ dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+ if ((((unsigned long)pMask & 3) == 0) &&
+ (((unsigned long)pDst & 3) == 0))
+ {
+ while (w >= 4)
+ {
+ __m64 vmask = load8888 (*(CARD32 *)mask);
+ __m64 vdest = load8888 (*(CARD32 *)dst);
+
+ *(CARD32 *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+
+ w -= 4;
+ dst += 4;
+ mask += 4;
+ }
+ }
+
+ while (w--)
+ {
+ CARD16 tmp;
+ CARD16 a;
+ CARD32 m, d;
+ CARD32 r;
+
+ a = *mask++;
+ d = *dst;
+
+ m = FbInU (sa, 0, a, tmp);
+ r = FbAdd (m, d, 0, tmp);
+
+ *dst++ = r;
+ }
+ }
+
+ _mm_empty();
+}
+
+void
fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op,
PicturePtr pSrc,
PicturePtr pMask,
diff --git a/pixman/src/fbmmx.h b/pixman/src/fbmmx.h
index 531bcba..5c08180 100644
--- a/pixman/src/fbmmx.h
+++ b/pixman/src/fbmmx.h
@@ -45,6 +45,20 @@ pixman_private
void fbComposeSetupMMX(void);
pixman_private
+void fbCompositeIn_nx8x8mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
void fbCompositeSolidMask_nx8888x0565Cmmx (pixman_operator_t op,
PicturePtr pSrc,
PicturePtr pMask,
@@ -109,6 +123,35 @@ void fbCompositeSolidMaskSrc_nx8x8888mmx
INT16 yDst,
CARD16 width,
CARD16 height);
+
+pixman_private
+void fbCompositeSrcAdd_8888x8x8mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
+pixman_private
+void fbCompositeIn_8x8mmx (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height);
+
pixman_private
void fbCompositeSrcAdd_8000x8000mmx (pixman_operator_t op,
PicturePtr pSrc,
diff --git a/pixman/src/fbpict.c b/pixman/src/fbpict.c
index 0cdec3f..2019cbc 100644
--- a/pixman/src/fbpict.c
+++ b/pixman/src/fbpict.c
@@ -844,6 +844,58 @@ fbCompositeSrcAdd_8888x8888 (pixman_oper
}
static void
+fbCompositeSrcAdd_8888x8x8 (pixman_operator_t op,
+ PicturePtr pSrc,
+ PicturePtr pMask,
+ PicturePtr pDst,
+ INT16 xSrc,
+ INT16 ySrc,
+ INT16 xMask,
+ INT16 yMask,
+ INT16 xDst,
+ INT16 yDst,
+ CARD16 width,
+ CARD16 height)
+{
+ CARD8 *dstLine, *dst;
+ CARD8 *maskLine, *mask;
+ FbStride dstStride, maskStride;
+ CARD16 w;
+ CARD32 src;
+ CARD8 sa;
+
+ fbComposeGetStart (pDst, xDst, yDst, CARD8, dstStride, dstLine, 1);
+ fbComposeGetStart (pMask, xMask, yMask, CARD8, maskStride, maskLine, 1);
+ fbComposeGetSolid (pSrc, pDst, src);
+ sa = (src >> 24);
+
+ while (height--)
+ {
+ dst = dstLine;
+ dstLine += dstStride;
+ mask = maskLine;
+ maskLine += maskStride;
+ w = width;
+
+ while (w--)
+ {
+ CARD16 tmp;
+ CARD16 a;
+ CARD32 m, d;
+ CARD32 r;
+
+ a = *mask++;
+ d = *dst;
+
+ m = FbInU (sa, 0, a, tmp);
+ r = FbAdd (m, d, 0, tmp);
+
+ *dst++ = r;
+ }
+ }
+}
+
+static void
fbCompositeSrcAdd_1000x1000 (pixman_operator_t op,
PicturePtr pSrc,
PicturePtr pMask,
@@ -1759,6 +1811,26 @@ pixman_composite (pixman_operator_t op,
break;
}
}
+ else
+ {
+ if ((pSrc->format_code == PICT_a8r8g8b8 ||
+ pSrc->format_code == PICT_a8b8g8r8) &&
+ srcRepeat &&
+ pMask->format_code == PICT_a8 &&
+ pDst->format_code == PICT_a8)
+ {
+#ifdef USE_MMX
+ if (fbHaveMMX())
+ {
+ srcRepeat = FALSE;
+
+ func = fbCompositeSrcAdd_8888x8x8mmx;
+ }
+ else
+#endif
+ func = fbCompositeSrcAdd_8888x8x8;
+ }
+ }
break;
case PIXMAN_OPERATOR_SRC:
if (pMask)
@@ -1798,10 +1870,34 @@ pixman_composite (pixman_operator_t op,
}
}
break;
+ case PIXMAN_OPERATOR_IN:
+#ifdef USE_MMX
+ if (pSrc->format_code == PICT_a8 &&
+ pDst->format_code == PICT_a8 &&
+ !pMask)
+ {
+ if (fbHaveMMX())
+ func = fbCompositeIn_8x8mmx;
+ }
+ else if (srcRepeat && pMask && !pMask->componentAlpha &&
+ (pSrc->format_code == PICT_a8r8g8b8 ||
+ pSrc->format_code == PICT_a8b8g8r8) &&
+ (pMask->format_code == PICT_a8) &&
+ pDst->format_code == PICT_a8)
+ {
+ if (fbHaveMMX())
+ {
+ srcRepeat = FALSE;
+ func = fbCompositeIn_nx8x8mmx;
+ }
+ }
+#else
+ func = NULL;
+#endif
+ break;
case PIXMAN_OPERATOR_CLEAR:
case PIXMAN_OPERATOR_DST:
case PIXMAN_OPERATOR_OVER_REVERSE:
- case PIXMAN_OPERATOR_IN:
case PIXMAN_OPERATOR_IN_REVERSE:
case PIXMAN_OPERATOR_OUT:
case PIXMAN_OPERATOR_OUT_REVERSE:
diff-tree d5531c4f506caa9ad66fbeef1822a7036d4dd528 (from 504cbdae37232d65f5c1f8913e55ac63397ad4f0)
Author: Carl Worth <cworth at cworth.org>
Date: Tue Jan 9 14:27:32 2007 -0800
Add unaligned_clip perf case courtesy of Jeff Muizelaar
Conflicts:
perf/Makefile.am
perf/cairo-perf.c
perf/cairo-perf.h
diff --git a/perf/Makefile.am b/perf/Makefile.am
index 0d5f244..1ed4c81 100644
--- a/perf/Makefile.am
+++ b/perf/Makefile.am
@@ -22,18 +22,19 @@ cairo_perf_SOURCES = \
box-outline.c \
fill.c \
long-lines.c \
+ mosaic.c \
+ mosaic.h \
paint.c \
+ pattern_create_radial.c \
stroke.c \
subimage_copy.c \
tessellate.c \
- pattern_create_radial.c \
+ unaligned-clip.c \
text.c \
world-map.c \
world-map.h \
zrusin.c \
- zrusin-another.h \
- mosaic.c \
- mosaic.h
+ zrusin-another.h
if CAIRO_HAS_WIN32_SURFACE
cairo_perf_SOURCES += cairo-perf-win32.c
diff --git a/perf/cairo-perf.c b/perf/cairo-perf.c
index 0708ed9..18cc652 100644
--- a/perf/cairo-perf.c
+++ b/perf/cairo-perf.c
@@ -328,5 +328,6 @@ cairo_perf_case_t perf_cases[] = {
{ box_outline, 100, 100},
{ mosaic, 800, 800 },
{ long_lines, 100, 100},
+ { unaligned_clip, 100, 100},
{ NULL }
};
diff --git a/perf/cairo-perf.h b/perf/cairo-perf.h
index 2bfd665..1ef9600 100644
--- a/perf/cairo-perf.h
+++ b/perf/cairo-perf.h
@@ -104,5 +104,6 @@ CAIRO_PERF_DECL (world_map);
CAIRO_PERF_DECL (box_outline);
CAIRO_PERF_DECL (mosaic);
CAIRO_PERF_DECL (long_lines);
+CAIRO_PERF_DECL (unaligned_clip);
#endif
diff --git a/perf/unaligned-clip.c b/perf/unaligned-clip.c
new file mode 100644
index 0000000..c7b9d21
--- /dev/null
+++ b/perf/unaligned-clip.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright © 2006 Jeff Muizelaar <jeff at infidigm.net>
+ * Copyright © 2006 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Jeff Muizelaar <jeff at infidigm.net>
+ * Carl Worth <cworth at cworth.org>
+ */
+
+#include "cairo-perf.h"
+
+static cairo_perf_ticks_t
+do_unaligned_clip (cairo_t *cr, int width, int height)
+{
+ cairo_save (cr);
+
+ cairo_perf_timer_start ();
+ /* First a triangular clip that obviously isn't along device-pixel
+ * boundaries. */
+ cairo_move_to (cr, 50, 50);
+ cairo_line_to (cr, 50, 90);
+ cairo_line_to (cr, 90, 90);
+ cairo_close_path (cr);
+ cairo_clip (cr);
+
+ /* Then a rectangular clip that would be but for the non-integer
+ * scaling. */
+ cairo_scale (cr, 1.1, 1.1);
+ cairo_move_to (cr, 55, 55);
+ cairo_line_to (cr, 90, 55);
+ cairo_line_to (cr, 90, 90);
+ cairo_line_to (cr, 90, 55);
+ cairo_close_path (cr);
+
+ cairo_clip (cr);
+ cairo_perf_timer_stop ();
+
+ cairo_restore (cr);
+
+ return cairo_perf_timer_elapsed ();
+}
+
+void
+unaligned_clip (cairo_perf_t *perf, cairo_t *cr, int width, int height)
+{
+ cairo_perf_run (perf, "unaligned_clip", do_unaligned_clip);
+}
More information about the cairo-commit
mailing list