[Pixman] [PATCH 3/3] MIPS: DSPr2: Added more fast-paths for OVER operation:
Nemanja Lukic
nlukic at mips.com
Sun Nov 4 13:58:21 PST 2012
From: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench results
Referent (before):
over_n_0565 = L1: 12.04 L2: 21.45 M: 18.50 ( 24.55%) HT: 6.93 VT: 6.45 R: 6.38 RT: 2.16 ( 22Kops/s)
over_n_8888 = L1: 93.76 L2: 85.96 M: 24.41 ( 64.78%) HT: 8.93 VT: 8.08 R: 7.99 RT: 2.54 ( 25Kops/s)
Optimized:
over_n_0565 = L1: 26.19 L2: 24.93 M: 21.28 ( 28.25%) HT: 18.75 VT: 18.07 R: 17.85 RT: 11.34 ( 57Kops/s)
over_n_8888 = L1: 55.31 L2: 49.07 M: 28.60 ( 75.93%) HT: 23.99 VT: 22.95 R: 22.34 RT: 12.85 ( 61Kops/s)
---
pixman/pixman-mips-dspr2-asm.S | 106 ++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2-asm.h | 31 ++++++++++++
pixman/pixman-mips-dspr2.c | 9 +++-
pixman/pixman-mips-dspr2.h | 36 ++++++++++++++
4 files changed, 181 insertions(+), 1 deletions(-)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index d2482e0..96d3f97 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1342,6 +1342,112 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
END(pixman_composite_over_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (32bit constant)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+ li t2, 0x00ff00ff
+ li t5, 0xf800f800
+ li t6, 0x07e007e0
+ li t7, 0x001F001F
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ /* a1 = source (32bit constant) */
+ lhu t0, 0(a0) /* t0 = destination (r5g6b5) */
+ /* a1 = source (32bit constant) */
+ lhu t1, 2(a0) /* t1 = destination (r5g6b5) */
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t3, t4, t6, t7, t8, t9, s0, s1
+ OVER_2x8888_2x8888 a1, a1, t3, t4, t0, t1, t2, t8, t9, s0, s1, s2, s3
+ CONVERT_2x8888_TO_2x0565 t0, t1, t3, t4, t5, t6, t7, t8, t9
+
+ sh t3, 0(a0)
+ sh t4, 2(a0)
+ addiu a2, a2, -2
+ addiu t1, a2, -1
+ bgtz t1, 1b
+ addiu a0, a0, 4
+2:
+ beqz a2, 3f
+ nop
+ /* a1 = source (32bit constant) */
+ lhu t0, 0(a0) /* t0 = destination (r5g6b5) */
+
+ CONVERT_1x0565_TO_1x8888 t0, t1, t3, t4
+ OVER_8888_8888 a1, t1, t0, t2, t3, t4, t5, t6
+ CONVERT_1x8888_TO_1x0565 t0, t1, t3, t4
+
+ sh t1, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+ j ra
+ nop
+
+END(pixman_composite_over_n_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (32bit constant)
+ * a2 - w
+ */
+
+ beqz a2, 4f
+ nop
+
+ SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+ li t4, 0x00ff00ff
+ srl t9, a2, 2 /* t9 = how many multiples of 4 dst pixels */
+ beqz t9, 2f /* branch if less than 4 src pixels */
+ nop
+
+1:
+ beqz t9, 2f
+ addiu t9, t9, -1
+
+ lw t0, 0(a0) /* t0 = destination (a8r8g8b8) */
+ lw t1, 4(a0) /* t1 = destination (a8r8g8b8) */
+ lw s0, 8(a0) /* s0 = destination (a8r8g8b8) */
+ lw s1, 12(a0) /* s1 = destination (a8r8g8b8) */
+
+ OVER_2x8888_2x8888 a1, a1, t0, t1, t2, t3, t4, t5, t6, t7, t8, s4, s5
+ OVER_2x8888_2x8888 a1, a1, s0, s1, s2, s3, t4, t5, t6, t7, t8, s4, s5
+
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+ sw s2, 8(a0)
+ sw s3, 12(a0)
+ addiu a2, a2, -4
+ b 1b
+ addiu a0, a0, 16
+2:
+ beqz a2, 3f
+ nop
+21:
+ lw t0, 0(a0) /* t0 = destination (a8r8g8b8) */
+
+ OVER_8888_8888 a1, t0, t1, t4, t3, t2, t5, t6
+
+ sw t1, 0(a0)
+ addiu a2, a2, -1
+ bnez a2, 21b
+ addiu a0, a0, 4
+3:
+ RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+4:
+ j ra
+ nop
+
+END(pixman_composite_over_n_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
/*
* a0 - dst (a8)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index b330c0f..dcbac0b 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -587,6 +587,37 @@ LEAF_MIPS32R2(symbol) \
addu_s.qb \out_8888, \out_8888, \s_8888
.endm
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8888 s1_8888, \
+ s2_8888, \
+ d1_8888, \
+ d2_8888, \
+ out1_8888, \
+ out2_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, \
+ scratch4, scratch5, scratch6
+ not \scratch1, \s1_8888
+ srl \scratch1, \scratch1, 24
+ not \scratch2, \s2_8888
+ srl \scratch2, \scratch2, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 \d1_8888, \d2_8888, \
+ \scratch1, \scratch2, \
+ \out1_8888, \out2_8888, \
+ \maskLSR, \
+ \scratch3, \scratch4, \scratch5, \
+ \scratch6, \d1_8888, \d2_8888
+
+ addu_s.qb \out1_8888, \out1_8888, \s1_8888
+ addu_s.qb \out2_8888, \out2_8888, \s2_8888
+.endm
+
.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
m_8, \
d_8888, \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 161377b..11f1254 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -81,6 +81,11 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
+ uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
+ uint32_t, 1)
+
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1,
uint8_t, 1, uint8_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
@@ -283,7 +288,9 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565),
-
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, r5g6b5, mips_composite_over_n_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, mips_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, mips_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, mips_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, mips_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, mips_composite_over_8888_n_0565),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 3766850..4ac9ff9 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_mips (dst_type *dst, \
+ uint32_t src, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dest_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ pixman_composite_##name##_asm_mips (dst, src, width); \
+ } \
+}
+
/*******************************************************************/
#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \
--
1.7.3
More information about the Pixman
mailing list