[Pixman] [PATCH 3/3] MIPS: DSPr2: Added more fast-paths for OVER operation:
Nemanja Lukic
nlukic at mips.com
Mon Nov 12 13:48:53 PST 2012
From: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench results
Referent (before):
over_n_0565 = L1: 14.48 L2: 21.36 M: 17.57 ( 23.30%) HT: 6.95 VT: 6.44 R: 6.39 RT: 2.16 ( 22Kops/s)
over_n_8888 = L1: 92.60 L2: 86.13 M: 24.41 ( 64.74%) HT: 8.94 VT: 8.06 R: 8.00 RT: 2.53 ( 25Kops/s)
Optimized:
over_n_0565 = L1: 27.65 L2: 189.22 M: 58.19 ( 77.12%) HT: 52.80 VT: 49.88 R: 47.53 RT: 23.67 ( 72Kops/s)
over_n_8888 = L1: 235.99 L2: 230.86 M: 29.09 ( 77.11%) HT: 27.95 VT: 27.24 R: 26.58 RT: 18.10 ( 67Kops/s)
---
pixman/pixman-mips-dspr2-asm.S | 134 ++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2.c | 9 +++-
pixman/pixman-mips-dspr2.h | 36 +++++++++++
3 files changed, 178 insertions(+), 1 deletions(-)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index d2482e0..ba22e62 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1342,6 +1342,140 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
END(pixman_composite_over_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (32bit constant)
+ * a2 - w
+ */
+
+ beqz a2, 5f
+ nop
+
+ not t0, a1
+ srl t0, t0, 24
+ bgtz t0, 1f
+ nop
+ CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
+0:
+ sh t1, 0(a0)
+ addiu a2, a2, -1
+ bgtz a2, 0b
+ addiu a0, a0, 2
+ j ra
+ nop
+
+1:
+ SAVE_REGS_ON_STACK 0, s0, s1, s2
+ li t4, 0x00ff00ff
+ li t5, 0xf800f800
+ li t6, 0x07e007e0
+ li t7, 0x001F001F
+ addiu t1, a2, -1
+ beqz t1, 3f
+ nop
+2:
+ lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
+ lhu t2, 2(a0) /* t2 = destination (r5g6b5) */
+
+ CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
+ MIPS_2xUN8x4_MUL_2xUN8 t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
+ addu_s.qb t1, t1, a1
+ addu_s.qb t2, t2, a1
+ CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
+
+ sh t3, 0(a0)
+ sh t8, 2(a0)
+
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 2b
+ addiu a0, a0, 4
+3:
+ beqz a2, 4f
+ nop
+
+ lhu t1, 0(a0) /* t1 = destination (r5g6b5) */
+
+ CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
+ MIPS_UN8x4_MUL_UN8 t2, t0, t1, t4, s0, s1, s2
+ addu_s.qb t1, t1, a1
+ CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
+
+ sh t2, 0(a0)
+
+4:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+5:
+ j ra
+ nop
+
+END(pixman_composite_over_n_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (32bit constant)
+ * a2 - w
+ */
+
+ beqz a2, 5f
+ nop
+
+ not t0, a1
+ srl t0, t0, 24
+ bgtz t0, 1f
+ nop
+0:
+ sw a1, 0(a0)
+ addiu a2, a2, -1
+ bgtz a2, 0b
+ addiu a0, a0, 4
+ j ra
+ nop
+
+1:
+ SAVE_REGS_ON_STACK 0, s0, s1, s2
+ li t4, 0x00ff00ff
+ addiu t1, a2, -1
+ beqz t1, 3f
+ nop
+2:
+ lw t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+ lw t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+
+ MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
+
+ addu_s.qb t7, t7, a1
+ addu_s.qb t8, t8, a1
+
+ sw t7, 0(a0)
+ sw t8, 4(a0)
+
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 2b
+ addiu a0, a0, 8
+3:
+ beqz a2, 4f
+ nop
+
+ lw t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+ MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
+
+ addu_s.qb t3, t3, a1
+
+ sw t3, 0(a0)
+
+4:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+5:
+ j ra
+ nop
+
+END(pixman_composite_over_n_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
/*
* a0 - dst (a8)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 161377b..11f1254 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -81,6 +81,11 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
+ uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
+ uint32_t, 1)
+
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1,
uint8_t, 1, uint8_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
@@ -283,7 +288,9 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565),
-
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mips_composite_over_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mips_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mips_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, mips_composite_over_8888_n_0565),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 3766850..4ac9ff9 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_mips (dst_type *dst, \
+ uint32_t src, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dest_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ pixman_composite_##name##_asm_mips (dst, src, width); \
+ } \
+}
+
/*******************************************************************/
#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \
--
1.7.3
More information about the Pixman
mailing list