[Pixman] [PATCH 2/4] Added fast path for "pad" type repeats.
Ben Avison
bavison at riscosopen.org
Wed Feb 13 06:10:39 PST 2013
Similar in concept to fast_composite_tiled_repeat(), this breaks up any
unscaled composites, where source/mask areas outside the bitmap grid are
not clipped, into a series of simpler composites (either bitmap to bitmap
or solid to bitmap). These simpler composites are usually likely to match
existing fast path implementations, and so should benefit all platforms.
This produces some significant speedups for some cairo-perf-trace tests.
For example, timings on ARMv6 (using Siarhei's trimmed traces) are
Before:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.29.3
[ 0] image t-firefox-chalkboard 35.715 35.736 0.03% 6/6
After:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.29.3
[ 0] image t-firefox-chalkboard 9.254 9.261 0.15% 6/6
That's a speedup of 3.86x.
Also added a simple test program to check different repeat types.
---
.gitignore | 1 +
pixman/pixman-fast-path.c | 293 +++++++++++++++++++++++++++++++++++++++++++++
test/Makefile.sources | 1 +
test/repeat-test.c | 59 +++++++++
4 files changed, 354 insertions(+), 0 deletions(-)
create mode 100644 test/repeat-test.c
diff --git a/.gitignore b/.gitignore
index 648699b..308d4ba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,6 +70,7 @@ test/region-contains-test
test/region-test
test/region-translate
test/region-translate-test
+test/repeat-test
test/rotate-test
test/scaling-crash-test
test/scaling-helpers-test
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 247aea6..59673de 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1367,6 +1367,289 @@ fast_composite_tiled_repeat (pixman_implementation_t *imp,
_pixman_image_fini (&extended_src_image);
}
+static uint32_t
+read_bits_sample (pixman_implementation_t *imp, pixman_image_t *image, uint32_t x, uint32_t y)
+{
+ uint32_t *line = image->bits.bits + y * image->bits.rowstride;
+ uint8_t *line8 = (uint8_t *) line;
+
+ if (image->bits.format == PIXMAN_a8r8g8b8)
+ return line[x];
+ if (image->bits.format == PIXMAN_x8r8g8b8)
+ return line[x] | 0xFF000000;
+ if (image->bits.format == PIXMAN_a8)
+ return line8[x] << 24;
+
+ uint32_t result;
+ pixman_iter_t iter;
+ _pixman_implementation_src_iter_init (
+ imp, &iter, image, x, y, 1, 1,
+ (uint8_t *) &result,
+ ITER_NARROW, image->common.flags);
+ return *iter.get_scanline (&iter, NULL);
+}
+
+static void
+fast_composite_pad_repeat (pixman_implementation_t *imp,
+ pixman_composite_info_t *info)
+{
+ PIXMAN_COMPOSITE_ARGS (info);
+ pixman_implementation_t *imp_border = imp;
+ pixman_composite_info_t info2 = *info;
+ pixman_composite_func_t func, func_border;
+ pixman_format_code_t mask_format, mask_format_border;
+ uint32_t src_flags, src_flags_border, mask_flags, mask_flags_border;
+ solid_fill_t src_solid, mask_solid;
+ pixman_image_t *src_image_border = (pixman_image_t *) &src_solid;
+ pixman_image_t *mask_image_border;
+
+ src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
+ FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
+ src_flags_border = FAST_PATH_STANDARD_FLAGS;
+ _pixman_image_init (src_image_border);
+ src_image_border->type = SOLID;
+ src_image_border->common.extended_format_code = PIXMAN_solid;
+ src_image_border->common.flags = src_flags_border;
+
+ if (mask_image)
+ {
+ if (mask_x != src_x || mask_y != src_y ||
+ mask_image->bits.width != src_image->bits.width ||
+ mask_image->bits.height != src_image->bits.height ||
+ mask_image->common.repeat != PIXMAN_REPEAT_PAD)
+ {
+ /* Have to fall back general_composite_rect() in this case.
+ * However, that's a static function outside our scope, and
+ * _pixman_implementation_lookup_composite() would just
+ * end up here again, so search for it manually... */
+ imp = imp->fallback;
+ const pixman_fast_path_t *fast_path = imp->fast_paths;
+ while (fast_path->op != PIXMAN_OP_NONE)
+ {
+ if (fast_path->op == PIXMAN_OP_any &&
+ fast_path->src_format == PIXMAN_any &&
+ fast_path->mask_format == PIXMAN_any &&
+ fast_path->dest_format == PIXMAN_any)
+ {
+ fast_path->func (imp, info);
+ break;
+ }
+ ++fast_path;
+ }
+ return;
+ }
+
+ mask_flags = info->mask_flags;
+ mask_flags_border = FAST_PATH_STANDARD_FLAGS;
+ mask_format = mask_image->common.extended_format_code;
+ mask_format_border = PIXMAN_solid;
+ mask_image_border = (pixman_image_t *) &mask_solid;
+ _pixman_image_init (mask_image_border);
+ mask_image_border->type = SOLID;
+ mask_image_border->common.extended_format_code = PIXMAN_solid;
+ mask_image_border->common.flags = mask_flags_border;
+ }
+ else
+ {
+ mask_flags = mask_flags_border = FAST_PATH_IS_OPAQUE;
+ mask_format = mask_format_border = PIXMAN_null;
+ mask_image_border = NULL;
+ }
+
+ _pixman_implementation_lookup_composite (
+ imp->toplevel, info->op,
+ src_image->common.extended_format_code, src_flags,
+ mask_format, mask_flags,
+ dest_image->common.extended_format_code, info->dest_flags,
+ &imp, &func);
+ _pixman_implementation_lookup_composite (
+ imp_border->toplevel, info->op,
+ PIXMAN_solid, src_flags_border,
+ mask_format_border, mask_flags_border,
+ dest_image->common.extended_format_code, info->dest_flags,
+ &imp_border, &func_border);
+
+ int32_t thresh_left, thresh_right, thresh_top, thresh_bottom;
+ int32_t x_skip = src_x < 0 ? 0 : src_x;
+ int32_t y_skip = src_y < 0 ? 0 : src_y;
+ if (src_image->bits.width == 1)
+ {
+ src_x = -width;
+ thresh_left = thresh_right = 0;
+ }
+ else
+ {
+ thresh_left = 0;
+ thresh_right = src_image->bits.width;
+ if (src_x > thresh_left)
+ thresh_left = src_x;
+ if (thresh_left > thresh_right)
+ thresh_right = thresh_left;
+ if (src_x + width < thresh_right)
+ thresh_right = src_x + width;
+ if (thresh_right < thresh_left)
+ thresh_left = thresh_right;
+ }
+ thresh_top = 0;
+ thresh_bottom = src_image->bits.height;
+ if (src_y > thresh_top)
+ thresh_top = src_y;
+ if (thresh_top > thresh_bottom)
+ thresh_bottom = thresh_top;
+ if (src_y + height < thresh_bottom)
+ thresh_bottom = src_y + height;
+ if (thresh_bottom < thresh_top)
+ thresh_top = thresh_bottom;
+
+ if (src_y < thresh_top)
+ {
+ if (src_x < thresh_left)
+ {
+ src_solid.color_32 = read_bits_sample (imp, src_image, 0, 0);
+ if (mask_image)
+ mask_solid.color_32 = read_bits_sample (imp, mask_image, 0, 0);
+ info2.src_image = src_image_border;
+ info2.mask_image = mask_image_border;
+ info2.width = thresh_left - src_x;
+ info2.height = thresh_top - src_y;
+ info2.src_flags = src_flags_border;
+ info2.mask_flags = mask_flags_border;
+ func_border (imp_border, &info2);
+ }
+ if (thresh_left < thresh_right)
+ {
+ info2.src_image = src_image;
+ info2.mask_image = mask_image;
+ info2.src_x = info2.mask_x = x_skip;
+ info2.src_y = info2.mask_y = 0;
+ info2.dest_x = dest_x + thresh_left - src_x;
+ info2.width = thresh_right - thresh_left;
+ info2.height = 1;
+ info2.src_flags = src_flags;
+ info2.mask_flags = mask_flags;
+ for (info2.dest_y = dest_y; info2.dest_y < dest_y + thresh_top - src_y; info2.dest_y++)
+ func (imp, &info2);
+ }
+ if (thresh_right < src_x + width)
+ {
+ src_solid.color_32 = read_bits_sample (imp, src_image, src_image->bits.width-1, 0);
+ if (mask_image)
+ mask_solid.color_32 = read_bits_sample (imp, mask_image, src_image->bits.width-1, 0);
+ info2.src_image = src_image_border;
+ info2.mask_image = mask_image_border;
+ info2.dest_x = dest_x + thresh_right - src_x;
+ info2.dest_y = dest_y;
+ info2.width = src_x + width - thresh_right;
+ info2.height = thresh_top - src_y;
+ info2.src_flags = src_flags_border;
+ info2.mask_flags = mask_flags_border;
+ func_border (imp_border, &info2);
+ }
+ }
+ if (thresh_top < thresh_bottom)
+ {
+ if (src_x < thresh_left)
+ {
+ info2.src_image = src_image_border;
+ info2.mask_image = mask_image_border;
+ info2.dest_x = dest_x;
+ info2.width = thresh_left - src_x;
+ info2.height = 1;
+ info2.src_flags = src_flags_border;
+ info2.mask_flags = mask_flags_border;
+ int32_t y;
+ for (y = 0; y < thresh_bottom - thresh_top; y++)
+ {
+ src_solid.color_32 = read_bits_sample (imp, src_image, 0, y_skip + y);
+ if (mask_image)
+ mask_solid.color_32 = read_bits_sample (imp, mask_image, 0, y_skip + y);
+ info2.dest_y = dest_y + thresh_top - src_y + y;
+ func_border (imp_border, &info2);
+ }
+ }
+ if (thresh_left < thresh_right)
+ {
+ info2.src_image = src_image;
+ info2.mask_image = mask_image;
+ info2.src_x = info2.mask_x = x_skip;
+ info2.src_y = info2.mask_y = y_skip;
+ info2.dest_x = dest_x + thresh_left - src_x;
+ info2.dest_y = dest_y + thresh_top - src_y;
+ info2.width = thresh_right - thresh_left;
+ info2.height = thresh_bottom - thresh_top;
+ info2.src_flags = src_flags;
+ info2.mask_flags = mask_flags;
+ func (imp, &info2);
+ }
+ if (thresh_right < src_x + width)
+ {
+ info2.src_image = src_image_border;
+ info2.mask_image = mask_image_border;
+ info2.dest_x = dest_x + thresh_right - src_x;
+ info2.width = src_x + width - thresh_right;
+ info2.height = 1;
+ info2.src_flags = src_flags_border;
+ info2.mask_flags = mask_flags_border;
+ int32_t y;
+ for (y = 0; y < thresh_bottom - thresh_top; y++)
+ {
+ src_solid.color_32 = read_bits_sample (imp, src_image, src_image->bits.width-1, y_skip + y);
+ if (mask_image)
+ mask_solid.color_32 = read_bits_sample (imp, mask_image, src_image->bits.width-1, y_skip + y);
+ info2.dest_y = dest_y + thresh_top - src_y + y;
+ func_border (imp_border, &info2);
+ }
+ }
+ }
+ if (thresh_bottom < src_y + height)
+ {
+ if (src_x < thresh_left)
+ {
+ src_solid.color_32 = read_bits_sample (imp, src_image, 0, src_image->bits.height-1);
+ if (mask_image)
+ mask_solid.color_32 = read_bits_sample (imp, mask_image, 0, src_image->bits.height-1);
+ info2.src_image = src_image_border;
+ info2.mask_image = mask_image_border;
+ info2.dest_x = dest_x;
+ info2.dest_y = dest_y + thresh_bottom - src_y;
+ info2.width = thresh_left - src_x;
+ info2.height = src_y + height - thresh_bottom;
+ info2.src_flags = src_flags_border;
+ info2.mask_flags = mask_flags_border;
+ func_border (imp_border, &info2);
+ }
+ if (thresh_left < thresh_right)
+ {
+ info2.src_image = src_image;
+ info2.mask_image = mask_image;
+ info2.src_x = info2.mask_x = x_skip;
+ info2.src_y = info2.mask_y = src_image->bits.height-1;
+ info2.dest_x = dest_x + thresh_left - src_x;
+ info2.width = thresh_right - thresh_left;
+ info2.height = 1;
+ info2.src_flags = src_flags;
+ info2.mask_flags = mask_flags;
+ for (info2.dest_y = dest_y + thresh_bottom - src_y; info2.dest_y < dest_y + height; info2.dest_y++)
+ func (imp, &info2);
+ }
+ if (thresh_right < src_x + width)
+ {
+ src_solid.color_32 = read_bits_sample (imp, src_image, src_image->bits.width-1, src_image->bits.height-1);
+ if (mask_image)
+ mask_solid.color_32 = read_bits_sample (imp, mask_image, src_image->bits.width-1, src_image->bits.height-1);
+ info2.src_image = src_image_border;
+ info2.mask_image = mask_image_border;
+ info2.dest_x = dest_x + thresh_right - src_x;
+ info2.dest_y = dest_y + thresh_bottom - src_y;
+ info2.width = src_x + width - thresh_right;
+ info2.height = src_y + height - thresh_bottom;
+ info2.src_flags = src_flags_border;
+ info2.mask_flags = mask_flags_border;
+ func_border (imp_border, &info2);
+ }
+ }
+}
+
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
static force_inline void
scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
@@ -1971,6 +2254,16 @@ static const pixman_fast_path_t c_fast_paths[] =
fast_composite_tiled_repeat
},
+ /* Pad repeat fast path entry. */
+ { PIXMAN_OP_any,
+ PIXMAN_any,
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
+ FAST_PATH_PAD_REPEAT),
+ PIXMAN_any, 0,
+ PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
+ fast_composite_pad_repeat
+ },
+
{ PIXMAN_OP_NONE },
};
diff --git a/test/Makefile.sources b/test/Makefile.sources
index e323a8e..bcbca37 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -1,6 +1,7 @@
# Tests (sorted by expected completion time)
TESTPROGRAMS = \
prng-test \
+ repeat-test \
a1-trap-test \
pdf-op-test \
region-test \
diff --git a/test/repeat-test.c b/test/repeat-test.c
new file mode 100644
index 0000000..a5557ae
--- /dev/null
+++ b/test/repeat-test.c
@@ -0,0 +1,59 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define WIDTH 3
+#define HEIGHT 5
+#define BORDER 6
+
+void
+check (pixman_repeat_t repeat_type, uint32_t expected_crc)
+{
+ uint32_t *src, *dest;
+ size_t i;
+ uint32_t crc;
+
+ prng_srand (0);
+
+ src = fence_malloc (WIDTH * HEIGHT * sizeof *src);
+ for (i = 0; i < WIDTH * HEIGHT; i++)
+ src[i] = 0xFF000000 | ((i & 0xFF) << 16) | ((i & 0xFF) << 8) | ((i & 0xFF) << 0);
+ dest = (uint32_t *) make_random_bytes ((WIDTH + 2 * BORDER) * (HEIGHT + 2 * BORDER) * sizeof *dest);
+
+ pixman_image_t *s = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH, HEIGHT, src, WIDTH * sizeof *src);
+ pixman_image_set_repeat (s, repeat_type);
+ pixman_image_t *d = pixman_image_create_bits (PIXMAN_a8r8g8b8, WIDTH + 2 * BORDER, HEIGHT + 2 * BORDER, dest, (WIDTH + 2 * BORDER) * sizeof *dest);
+ pixman_image_composite (PIXMAN_OP_OVER, s, NULL, d, -BORDER, -BORDER, 0, 0, 0, 0, WIDTH + BORDER * 2, HEIGHT + BORDER * 2);
+
+#if 0
+ size_t x, y;
+ for (y = 0; y < HEIGHT + 2 * BORDER; y++)
+ {
+ for (x = 0; x < WIDTH + 2 * BORDER; x++)
+ printf (" %08X", dest[y * (WIDTH + 2 * BORDER) + x]);
+ printf ("\n");
+ }
+ printf ("\n");
+#endif
+
+ if ((crc = compute_crc32_for_image (0, d)) != expected_crc)
+ {
+ const char *name[] = { "NONE", "NORMAL", "PAD", "REFLECT" };
+ printf ("Failed CRC for repeat type '%s': got %08X, expected %08X\n",
+ name[repeat_type], crc, expected_crc);
+ exit (EXIT_FAILURE);
+ }
+
+ fence_free (src);
+ fence_free (dest);
+}
+
+int
+main (void)
+{
+ check (PIXMAN_REPEAT_NONE, 0xCB6B2680);
+ check (PIXMAN_REPEAT_NORMAL, 0x0254C0A1);
+ check (PIXMAN_REPEAT_PAD, 0x29D09CDB);
+ check (PIXMAN_REPEAT_REFLECT, 0x85F2E260);
+ return EXIT_SUCCESS;
+}
--
1.7.5.4
More information about the Pixman
mailing list