[cairo] Optimize spans in the trapezoid rasterizer
Billy Biggs
vektor at dumbterm.net
Sun Jul 24 20:14:49 PDT 2005
The attached patch modifies the 8-bit alpha trapezoid rasterizer in
libpixman's fbedge.c to specifically handle opaque spans in the middle
of a trapezoid. It significantly speeds up large trapezoids without
slowing down small ones. The code also allows for optimized
implementations of a saturated 8-bit add (MMX paddusb) and memset-to-255
to be easily dropped in.
If accepted, this patch should probably also get applied to the copy
in Xorg.
-Billy
-------------- next part --------------
Index: src/fbedge.c
===================================================================
RCS file: /cvs/cairo/libpixman/src/fbedge.c,v
retrieving revision 1.2
diff -p -u -r1.2 fbedge.c
--- src/fbedge.c 21 Jan 2005 18:26:28 -0000 1.2
+++ src/fbedge.c 25 Jul 2005 02:18:25 -0000
@@ -27,31 +27,6 @@
#ifdef RENDER
/*
- * 8 bit alpha
- */
-
-#define N_BITS 8
-#define rasterizeEdges fbRasterizeEdges8
-
-#define DefineAlpha(line,x) \
- CARD8 *__ap = (CARD8 *) line + (x)
-
-#define StepAlpha __ap++
-
-#define AddAlpha(a) { \
- CARD16 __a = a + *__ap; \
- *__ap = ((CARD8) ((__a) | (0 - ((__a) >> 8)))); \
-}
-
-#include "fbedgeimp.h"
-
-#undef AddAlpha
-#undef StepAlpha
-#undef DefineAlpha
-#undef rasterizeEdges
-#undef N_BITS
-
-/*
* 4 bit alpha
*/
@@ -100,6 +75,220 @@
#undef rasterizeEdges
#undef N_BITS
+/*
+ * 8 bit alpha
+ */
+
+#if defined (__GNUC__) && !defined (NO_INLINES)
+#define INLINE inline __attribute__ ((always_inline,const))
+#else
+#define INLINE
+#endif
+
+static INLINE CARD8
+clip255 (int x)
+{
+ if (x > 255) return 255;
+ return x;
+}
+
+static INLINE void
+add_saturate_8(CARD8 *buf, int value, int length)
+{
+ while (length--)
+ {
+ *buf = clip255 (*buf + value);
+ buf++;
+ }
+}
+
+static INLINE void
+memset_255(CARD8 *buf, int length)
+{
+ while (length--) *buf++ = 255;
+}
+
+/*
+ * We want to detect the case where we add the same value to a long
+ * span of pixels. The triangles on the end are filled in while we
+ * count how many sub-pixel scanlines contribute to the middle section.
+ *
+ * +--------------------------+
+ * fill_height =| \ /
+ * +------------------+
+ * |================|
+ * fill_start fill_end
+ */
+static void
+fbRasterizeEdges8 (FbBits *buf,
+ int width,
+ int stride,
+ RenderEdge *l,
+ RenderEdge *r,
+ xFixed t,
+ xFixed b)
+{
+ xFixed y = t;
+ FbBits *line;
+ int fill_start = -1, fill_end = -1;
+ int fill_size = 0;
+
+ line = buf + xFixedToInt (y) * stride;
+
+ for (;;)
+ {
+ CARD8 *ap = (CARD8 *) line;
+ xFixed lx, rx;
+ int lxi, rxi;
+
+ /* clip X */
+ lx = l->x;
+ if (lx < 0)
+ lx = 0;
+ rx = r->x;
+ if (xFixedToInt (rx) >= width)
+ rx = IntToxFixed (width);
+
+ /* Skip empty (or backwards) sections */
+ if (rx > lx)
+ {
+ int lxs, rxs;
+
+ /* Find pixel bounds for span. */
+ lxi = xFixedToInt (lx);
+ rxi = xFixedToInt (rx);
+
+ /* Sample coverage for edge pixels */
+ lxs = RenderSamplesX (lx, 8);
+ rxs = RenderSamplesX (rx, 8);
+
+ /* Add coverage across row */
+ if (lxi == rxi)
+ {
+ ap[lxi] = clip255 (ap[lxi] + rxs - lxs);
+ }
+ else
+ {
+ ap[lxi] = clip255 (ap[lxi] + N_X_FRAC(8) - lxs);
+
+ /* Move forward so that lxi/rxi is the pixel span */
+ lxi++;
+
+ /* Don't bother trying to optimize the fill unless
+ * the span is longer than 4 pixels. */
+ if (rxi - lxi > 4)
+ {
+ if (fill_start < 0)
+ {
+ fill_start = lxi;
+ fill_end = rxi;
+ fill_size++;
+ }
+ else
+ {
+ if (lxi >= fill_end || rxi < fill_start)
+ {
+ /* We're beyond what we saved, just fill it */
+ add_saturate_8 (ap + fill_start,
+ fill_size * N_X_FRAC(8),
+ fill_end - fill_start);
+ fill_start = lxi;
+ fill_end = rxi;
+ fill_size = 1;
+ }
+ else
+ {
+ /* Update fill_start */
+ if (lxi > fill_start)
+ {
+ add_saturate_8 (ap + fill_start,
+ fill_size * N_X_FRAC(8),
+ lxi - fill_start);
+ fill_start = lxi;
+ }
+ else if (lxi < fill_start)
+ {
+ add_saturate_8 (ap + lxi, N_X_FRAC(8),
+ fill_start - lxi);
+ }
+
+ /* Update fill_end */
+ if (rxi < fill_end)
+ {
+ add_saturate_8 (ap + rxi,
+ fill_size * N_X_FRAC(8),
+ fill_end - rxi);
+ fill_end = rxi;
+ }
+ else if (fill_end < rxi)
+ {
+ add_saturate_8 (ap + fill_end,
+ N_X_FRAC(8),
+ rxi - fill_end);
+ }
+ fill_size++;
+ }
+ }
+ }
+ else
+ {
+ add_saturate_8 (ap + lxi, N_X_FRAC(8), rxi - lxi);
+ }
+
+ /* Do not add in a 0 alpha here. This check is
+ * necessary to avoid a buffer overrun, (when rx
+ * is exactly on a pixel boundary). */
+ if (rxs)
+ ap[rxi] = clip255 (ap[rxi] + rxs);
+ }
+ }
+
+ if (y == b) {
+ /* We're done, make sure we clean up any remaining fill. */
+ if (fill_start != fill_end) {
+ if (fill_size == N_Y_FRAC(8))
+ {
+ memset_255 (ap + fill_start, fill_end - fill_start);
+ }
+ else
+ {
+ add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
+ fill_end - fill_start);
+ }
+ }
+ break;
+ }
+
+ if (xFixedFrac (y) != Y_FRAC_LAST(8))
+ {
+ RenderEdgeStepSmall (l);
+ RenderEdgeStepSmall (r);
+ y += STEP_Y_SMALL(8);
+ }
+ else
+ {
+ RenderEdgeStepBig (l);
+ RenderEdgeStepBig (r);
+ y += STEP_Y_BIG(8);
+ if (fill_start != fill_end)
+ {
+ if (fill_size == N_Y_FRAC(8))
+ {
+ memset_255 (ap + fill_start, fill_end - fill_start);
+ }
+ else
+ {
+ add_saturate_8 (ap + fill_start, fill_size * N_X_FRAC(8),
+ fill_end - fill_start);
+ }
+ fill_start = fill_end = -1;
+ fill_size = 0;
+ }
+ line += stride;
+ }
+ }
+}
+
void
fbRasterizeEdges (FbBits *buf,
int bpp,
More information about the cairo
mailing list