[Pixman] [PATCH] MIPS: DSPr2: Added two fast paths: - pixbuf - rpixbuf
Nemanja Lukic
nemanja.lukic at rt-rk.com.com
Mon Mar 4 09:25:14 PST 2013
From: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench results
Referent (before):
pixbuf = L1: 18.18 L2: 16.47 M: 13.36 (107.27%) HT: 10.16 VT: 10.07 R: 9.84 RT: 5.54 ( 35Kops/s)
rpixbuf = L1: 14.63 L2: 13.55 M: 9.91 ( 79.53%) HT: 8.47 VT: 8.32 R: 8.17 RT: 4.90 ( 33Kops/s)
Optimized:
pixbuf = L1: 43.54 L2: 36.02 M: 17.08 (137.09%) HT: 15.58 VT: 14.85 R: 13.87 RT: 8.38 ( 44Kops/s)
rpixbuf = L1: 45.69 L2: 37.30 M: 17.24 (138.31%) HT: 15.66 VT: 14.88 R: 13.97 RT: 8.38 ( 44Kops/s)
---
pixman/pixman-mips-dspr2-asm.S | 121 ++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2.c | 8 +++
2 files changed, 129 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 3a4d914..866e93e 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -699,6 +699,127 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
END(pixman_composite_src_0888_0565_rev_asm_mips)
#endif
+LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
+/*
+ * a0 - dst (a8b8g8r8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li v0, 0x00ff00ff
+
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ addiu a1, a1, 8
+ addiu a2, a2, -2
+ srl t2, t0, 24
+ srl t3, t1, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+ sll t0, t0, 8
+ sll t1, t1, 8
+ andi t2, t2, 0xff
+ andi t3, t3, 0xff
+ or t0, t0, t2
+ or t1, t1, t3
+ wsbh t0, t0
+ wsbh t1, t1
+ rotr t0, t0, 16
+ rotr t1, t1, 16
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ lw t0, 0(a1)
+ srl t1, t0, 24
+
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+ sll t0, t0, 8
+ andi t1, t1, 0xff
+ or t0, t0, t1
+ wsbh t0, t0
+ rotr t0, t0, 16
+ sw t0, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_src_pixbuf_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ li v0, 0x00ff00ff
+
+ beqz a2, 3f
+ nop
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+1:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ addiu a1, a1, 8
+ addiu a2, a2, -2
+ srl t2, t0, 24
+ srl t3, t1, 24
+
+ MIPS_2xUN8x4_MUL_2xUN8 t0, t1, t2, t3, t0, t1, v0, t4, t5, t6, t7, t8, t9
+
+ sll t0, t0, 8
+ sll t1, t1, 8
+ andi t2, t2, 0xff
+ andi t3, t3, 0xff
+ or t0, t0, t2
+ or t1, t1, t3
+ rotr t0, t0, 8
+ rotr t1, t1, 8
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ lw t0, 0(a1)
+ srl t1, t0, 24
+
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t3, t4, t5
+
+ sll t0, t0, 8
+ andi t1, t1, 0xff
+ or t0, t0, t1
+ rotr t0, t0, 8
+ sw t0, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_composite_src_rpixbuf_8888_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
/*
* a0 - dst (a8r8g8b8)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 1949921..e10c9df 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -54,6 +54,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
uint8_t, 3, uint16_t, 1)
#endif
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
@@ -299,6 +303,10 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev),
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev),
#endif
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8r8g8b8, mips_composite_src_pixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, pixbuf, pixbuf, a8b8g8r8, mips_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8r8g8b8, mips_composite_src_rpixbuf_8888),
+ PIXMAN_STD_FAST_PATH (SRC, rpixbuf, rpixbuf, a8b8g8r8, mips_composite_src_pixbuf_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, mips_composite_src_n_8_8888),
PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, mips_composite_src_n_8_8888),
--
1.7.3
More information about the Pixman
mailing list