[Pixman] [PATCH] Faster C variant of over_n_8_8888 fast path
Siarhei Siamashka
siarhei.siamashka at gmail.com
Sat Sep 11 05:31:10 PDT 2010
From: Siarhei Siamashka <siarhei.siamashka at nokia.com>
The main loop is split into handling 3 cases:
- opaque source
- translucent source without additive blending
- translucent source with additive blending
When using a normal premultiplied alpha format (by converting to it
from non-premultiplied for example), it is impossible to have any
color component which has a higher value than alpha. If any of the
R, G or B color components are increased artificially and become
higher than alpha, the blending operation becomes additive.
The distinction between additive and non-additive blending becomes
important if the target CPU does not have special instructions for
saturated addition. Non-additive blending does not require saturation
and just ordinary addition can be used instead, improving performance.
With this patch, over_n_8_8888 operation becomes ~1.5x faster for
the cases when non-additive alpha blending is used (this includes
the case of having opaque source and translucent mask for example).
Performance of the inner loop goes up from ~77MPix/s to ~122MPix/s
on Core i7 860 with gcc 4.4 as tested using a simple microbenchmark.
---
pixman/pixman-fast-path.c | 113 +++++++++++++++++++++++++++++++++------------
1 files changed, 84 insertions(+), 29 deletions(-)
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index f03752f..1cb0796 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -290,6 +290,68 @@ fast_composite_in_8_8 (pixman_implementation_t *imp,
}
}
+/* A variant of 'over', which works faster for non-additive blending on the
+ * platforms which do not have special instructions for saturated addition
+ */
+static force_inline uint32_t
+over_a (uint32_t src, uint32_t dest, pixman_bool_t additive_blending)
+{
+ uint32_t a = ~src >> 24;
+ if (additive_blending)
+ {
+ UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
+ return dest;
+ }
+ else
+ {
+ UN8x4_MUL_UN8 (dest, a);
+ return dest + src;
+ }
+}
+
+static force_inline void
+fast_composite_over_n_8_8888_mainloop (uint32_t * dst_line,
+ int dst_stride,
+ uint8_t * mask_line,
+ int mask_stride,
+ int width,
+ int height,
+ uint32_t src,
+ pixman_bool_t opaque_src,
+ pixman_bool_t additive_blending)
+{
+ uint32_t *dst, d;
+ uint8_t *mask, m;
+ int32_t w;
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+ w = width;
+
+ while (w--)
+ {
+ m = *mask++;
+ if (m == 0xff)
+ {
+ if (opaque_src)
+ *dst = src;
+ else
+ *dst = over_a (src, *dst, additive_blending);
+ }
+ else if (m)
+ {
+ d = in (src, m);
+ *dst = over_a (d, *dst, additive_blending);
+ }
+ dst++;
+ }
+ }
+}
+
static void
fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
pixman_op_t op,
@@ -305,46 +367,39 @@ fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
int32_t width,
int32_t height)
{
- uint32_t src, srca;
- uint32_t *dst_line, *dst, d;
- uint8_t *mask_line, *mask, m;
+ uint32_t src;
+ uint32_t *dst_line;
+ uint8_t *mask_line;
int dst_stride, mask_stride;
- int32_t w;
src = _pixman_image_get_solid (src_image, dst_image->bits.format);
- srca = src >> 24;
if (src == 0)
return;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
- while (height--)
+ if (src >> 24 == 0xff)
{
- dst = dst_line;
- dst_line += dst_stride;
- mask = mask_line;
- mask_line += mask_stride;
- w = width;
-
- while (w--)
- {
- m = *mask++;
- if (m == 0xff)
- {
- if (srca == 0xff)
- *dst = src;
- else
- *dst = over (src, *dst);
- }
- else if (m)
- {
- d = in (src, m);
- *dst = over (d, *dst);
- }
- dst++;
- }
+ /* opaque source, and as a result also non-additive blending */
+ fast_composite_over_n_8_8888_mainloop (dst_line, dst_stride, mask_line,
+ mask_stride, width, height, src,
+ TRUE, FALSE);
+ }
+ else if (src >= (src << 8) && src >= (src << 16) && src >= (src << 24))
+ {
+ /* non-additive blending */
+ fast_composite_over_n_8_8888_mainloop (dst_line, dst_stride, mask_line,
+ mask_stride, width, height, src,
+ FALSE, FALSE);
+ }
+ else
+ {
+ /* additive blending (the slowest variant, hopefully very uncommon) */
+ fast_composite_over_n_8_8888_mainloop (dst_line, dst_stride, mask_line,
+ mask_stride, width, height, src,
+ FALSE, TRUE);
}
}
--
1.7.2.2
More information about the Pixman
mailing list