[Pixman] [PATCH/RFC 2/2] C fast path for a simple 90/180/270 degrees rotation.

Siarhei Siamashka siarhei.siamashka at gmail.com
Fri Jul 30 11:22:11 PDT 2010


From: Siarhei Siamashka <siarhei.siamashka at nokia.com>

---
 pixman/pixman-fast-path.c |  295 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 295 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 6ed1580..df22c2d 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1797,6 +1797,281 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp,
     }
 }
 
+#define CACHE_LINE_SIZE 64
+
+#define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
+                                                                              \
+static void                                                                   \
+blt_rotated_180_##suffix (pix_type       *dst,                                \
+			  int             dst_stride,                         \
+			  const pix_type *src,                                \
+			  int             src_stride,                         \
+			  int             w,                                  \
+			  int             h)                                  \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = h - 1; y >= 0; y--)                                              \
+    {                                                                         \
+	const pix_type *s = src + src_stride * (h - y - 1);                   \
+	pix_type *d = dst + dst_stride * y + w - 1;                           \
+	for (x = w - 1; x >= 0; x--)                                          \
+	    *d-- = *s++;                                                      \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
+				 int             dst_stride,                  \
+				 const pix_type *src,                         \
+				 int             src_stride,                  \
+				 int             w,                           \
+				 int             h)                           \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++)                                                   \
+    {                                                                         \
+	const pix_type *s = src + (h - y - 1);                                \
+	pix_type *d = dst + dst_stride * y;                                   \
+	for (x = 0; x < w; x++)                                               \
+	{                                                                     \
+	    *d++ = *s;                                                        \
+	    s += src_stride;                                                  \
+	}                                                                     \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
+				  int             dst_stride,                 \
+				  const pix_type *src,                        \
+				  int             src_stride,                 \
+				  int             w,                          \
+				  int             h)                          \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++)                                                   \
+    {                                                                         \
+	const pix_type *s = src + src_stride * (w - 1) + y;                   \
+	pix_type *d = dst + dst_stride * y;                                   \
+	for (x = 0; x < w; x++)                                               \
+	{                                                                     \
+	    *d++ = *s;                                                        \
+	    s -= src_stride;                                                  \
+	}                                                                     \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_90_##suffix (pix_type       *dst,                                 \
+			 int             dst_stride,                          \
+			 const pix_type *src,                                 \
+			 int             src_stride,                          \
+			 int             W,                                   \
+			 int             H)                                   \
+{                                                                             \
+    int x;                                                                    \
+    int leading_pixels = 0, trailing_pixels = 0;                              \
+    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
+                                                                              \
+    /*                                                                        \
+     * split processing into handling destination as TILE_SIZExH cache line   \
+     * aligned vertical stripes (optimistically assuming that destination     \
+     * stride is a multiple of cache line, if not - it will be just a bit     \
+     * slower)                                                                \
+     */                                                                       \
+                                                                              \
+    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) {                             \
+	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (leading_pixels > W)                                               \
+	    leading_pixels = W;                                               \
+                                                                              \
+	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
+	blt_rotated_90_trivial_##suffix (                                     \
+	    dst,                                                              \
+	    dst_stride,                                                       \
+	    src,                                                              \
+	    src_stride,                                                       \
+	    leading_pixels,                                                   \
+	    H);                                                               \
+	                                                                      \
+	dst += leading_pixels;                                                \
+	src += leading_pixels * src_stride;                                   \
+	W -= leading_pixels;                                                  \
+    }                                                                         \
+                                                                              \
+    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) {                       \
+	trailing_pixels = (((uintptr_t)(dst + W) &                            \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (trailing_pixels > W)                                              \
+	    trailing_pixels = W;                                              \
+	W -= trailing_pixels;                                                 \
+    }                                                                         \
+                                                                              \
+    for (x = 0; x < W; x += TILE_SIZE) {                                      \
+	/* aligned middle part TILE_SIZExH */                                 \
+	blt_rotated_90_trivial_##suffix (                                     \
+	    dst + x,                                                          \
+	    dst_stride,                                                       \
+	    src + src_stride * x,                                             \
+	    src_stride,                                                       \
+	    TILE_SIZE,                                                        \
+	    H);                                                               \
+    }                                                                         \
+                                                                              \
+    if (trailing_pixels) {                                                    \
+	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
+	blt_rotated_90_trivial_##suffix (                                     \
+	    dst + W,                                                          \
+	    dst_stride,                                                       \
+	    src + W * src_stride,                                             \
+	    src_stride,                                                       \
+	    trailing_pixels,                                                  \
+	    H);                                                               \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_270_##suffix (pix_type       *dst,                                \
+			  int             dst_stride,                         \
+			  const pix_type *src,                                \
+			  int             src_stride,                         \
+			  int             W,                                  \
+			  int             H)                                  \
+{                                                                             \
+    int x;                                                                    \
+    int leading_pixels = 0, trailing_pixels = 0;                              \
+    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
+                                                                              \
+    /*                                                                        \
+     * split processing into handling destination as TILE_SIZExH cache line   \
+     * aligned vertical stripes (optimistically assuming that destination     \
+     * stride is a multiple of cache line, if not - it will be just a bit     \
+     * slower)                                                                \
+     */                                                                       \
+                                                                              \
+    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) {                             \
+	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (leading_pixels > W)                                               \
+	    leading_pixels = W;                                               \
+                                                                              \
+	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
+	blt_rotated_270_trivial_##suffix (                                    \
+	    dst,                                                              \
+	    dst_stride,                                                       \
+	    src + src_stride * (W - leading_pixels),                          \
+	    src_stride,                                                       \
+	    leading_pixels,                                                   \
+	    H);                                                               \
+	                                                                      \
+	dst += leading_pixels;                                                \
+	W -= leading_pixels;                                                  \
+    }                                                                         \
+                                                                              \
+    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) {                       \
+	trailing_pixels = (((uintptr_t)(dst + W) &                            \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (trailing_pixels > W)                                              \
+	    trailing_pixels = W;                                              \
+	W -= trailing_pixels;                                                 \
+	src += trailing_pixels * src_stride;                                  \
+    }                                                                         \
+                                                                              \
+    for (x = 0; x < W; x += TILE_SIZE) {                                      \
+	/* aligned middle part TILE_SIZExH */                                 \
+	blt_rotated_270_trivial_##suffix (                                    \
+	    dst + x,                                                          \
+	    dst_stride,                                                       \
+	    src + src_stride * (W - x - TILE_SIZE),                           \
+	    src_stride,                                                       \
+	    TILE_SIZE,                                                        \
+	    H);                                                               \
+    }                                                                         \
+                                                                              \
+    if (trailing_pixels) {                                                    \
+	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
+	blt_rotated_270_trivial_##suffix (                                    \
+	    dst + W,                                                          \
+	    dst_stride,                                                       \
+	    src - trailing_pixels * src_stride,                               \
+	    src_stride,                                                       \
+	    trailing_pixels,                                                  \
+	    H);                                                               \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+fast_composite_simple_rotate_##suffix (pixman_implementation_t *imp,          \
+				       pixman_op_t              op,           \
+				       pixman_image_t *         src_image,    \
+				       pixman_image_t *         mask_image,   \
+				       pixman_image_t *         dst_image,    \
+				       int32_t                  src_x,        \
+				       int32_t                  src_y,        \
+				       int32_t                  mask_x,       \
+				       int32_t                  mask_y,       \
+				       int32_t                  dest_x,       \
+				       int32_t                  dest_y,       \
+				       int32_t                  width,        \
+				       int32_t                  height)       \
+{                                                                             \
+    pix_type       *dst_line;                                                 \
+    pix_type       *src_line;                                                 \
+    int             dst_stride, src_stride;                                   \
+    int             src_x_t, src_y_t;                                         \
+                                                                              \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type,               \
+			   dst_stride, dst_line, 1);                          \
+                                                                              \
+    switch (src_image->common.transform->matrix[0][1])                        \
+    {                                                                         \
+    case -pixman_fixed_1:                                                     \
+	/* 90 degrees */                                                      \
+	src_x_t = -src_y + pixman_fixed_to_int (                              \
+				src_image->common.transform->matrix[0][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
+	src_y_t = src_x + pixman_fixed_to_int (                               \
+				src_image->common.transform->matrix[1][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e);         \
+	PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,         \
+			       src_stride, src_line, 1);                      \
+	blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,  \
+			width, height);                                       \
+	break;                                                                \
+    case pixman_fixed_1:                                                      \
+	/* 270 degrees */                                                     \
+	src_x_t = src_y + pixman_fixed_to_int (                               \
+				src_image->common.transform->matrix[0][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e);         \
+	src_y_t = -src_x + pixman_fixed_to_int (                              \
+				src_image->common.transform->matrix[1][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
+	PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,         \
+			       src_stride, src_line, 1);                      \
+	blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \
+			 width, height);                                      \
+	break;                                                                \
+    default:                                                                  \
+	/* 180 degrees */                                                     \
+	src_x_t = -src_x + pixman_fixed_to_int (                              \
+				src_image->common.transform->matrix[0][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
+	src_y_t = -src_y + pixman_fixed_to_int (                              \
+				src_image->common.transform->matrix[1][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
+	PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,         \
+				src_stride, src_line, 1);                     \
+	blt_rotated_180_##suffix (dst_line, dst_stride, src_line, src_stride, \
+				 width, height);                              \
+	break;                                                                \
+    }                                                                         \
+}
+
+FAST_SIMPLE_ROTATE (8, uint8_t)
+FAST_SIMPLE_ROTATE (565, uint16_t)
+FAST_SIMPLE_ROTATE (8888, uint32_t)
+
 static const pixman_fast_path_t c_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
@@ -1941,6 +2216,26 @@ static const pixman_fast_path_t c_fast_paths[] =
     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
 
+#define SIMPLE_ROTATE_FLAGS						\
+    (FAST_PATH_SIMPLE_ROTATE_TRANSFORM	|				\
+     FAST_PATH_NO_ALPHA_MAP	|					\
+     FAST_PATH_NO_ACCESSORS	|					\
+     FAST_PATH_NO_WIDE_FORMAT)
+
+#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
+    {   PIXMAN_OP_ ## op,						  \
+	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+	PIXMAN_null, 0,							  \
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
+	fast_composite_simple_rotate_##suffix,				  \
+    }
+
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
+
     {   PIXMAN_OP_NONE	},
 };
 
-- 
1.6.4.4



More information about the Pixman mailing list