[Pixman] [PATCH] MIPS: DSPr2: Added over_n_8_8888 and over_n_8_0565 fast paths.
Nemanja Lukic
nlukic at mips.com
Tue Apr 3 10:30:21 PDT 2012
From: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Performance numbers before/after on MIPS-74kc @ 1GHz
Referent (before):
lowlevel-blt-bench:
over_n_8_8888 = L1: 10.71 L2: 10.11 M: 8.70 ( 34.57%) HT: 7.82 VT: 7.77 R: 7.66 RT: 5.37 ( 41Kops/s)
over_n_8_0565 = L1: 8.24 L2: 8.04 M: 7.49 ( 19.84%) HT: 6.82 VT: 6.75 R: 6.70 RT: 4.85 ( 40Kops/s)
cairo-perf-trace:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.25.3
[ 0] image swfdec-giant-steps 76.936 77.822 0.49% 6/6
[ 1] image gnome-system-monitor 277.838 278.500 0.16% 6/6
[ # ] image16: pixman 0.25.3
[ 0] image16 swfdec-giant-steps 60.598 61.966 1.10% 6/6
[ 1] image16 gnome-system-monitor 277.628 277.675 0.02% 6/6
Optimized:
lowlevel-blt-bench:
over_n_8_8888 = L1: 18.38 L2: 17.29 M: 13.49 ( 53.58%) HT: 11.44 VT: 11.31 R: 11.05 RT: 6.65 ( 47Kops/s)
over_n_8_0565 = L1: 12.42 L2: 11.86 M: 10.68 ( 28.28%) HT: 9.27 VT: 9.16 R: 9.04 RT: 5.83 ( 44Kops/s)
cairo-perf-trace:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.25.3
[ 0] image swfdec-giant-steps 71.430 71.593 0.18% 6/6
[ 1] image gnome-system-monitor 253.903 254.007 0.02% 6/6
[ # ] image16: pixman 0.25.3
[ 0] image16 swfdec-giant-steps 58.791 59.358 0.62% 6/6
[ 1] image16 gnome-system-monitor 253.713 253.863 0.03% 6/6
---
pixman/pixman-mips-dspr2-asm.S | 122 ++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2-asm.h | 26 +++++++++
pixman/pixman-mips-dspr2.c | 10 +++
3 files changed, 158 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 6a0fc18..e51a718 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -527,3 +527,125 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
nop
END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ beqz a3, 3f
+ nop
+ li t4, 0x00ff00ff
+ li t5, 0xff
+ srl t6, a1, 24 /* t6 = srca */
+ beq t5, t6, 2f /* if (srca == 0xff) */
+ nop
+1:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ beqz t0, 111f /* if (t0 == 0) */
+ addiu a2, a2, 1
+ move t3, a1
+ beq t0, t5, 11f /* if (t0 == 0xff) */
+ lw t1, 0(a0) /* t1 = dst */
+
+ MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
+11:
+ not t2, t3
+ srl t2, t2, 24
+ MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
+ addu_s.qb t2, t1, t3
+ sw t2, 0(a0)
+111:
+ addiu a3, a3, -1
+ bgtz a3, 1b
+ addiu a0, a0, 4
+ b 3f
+ nop
+2:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ beqz t0, 222f /* if (t0 == 0) */
+ addiu a2, a2, 1
+ beq t0, t5, 22f /* if (t0 == 0xff) */
+ move t2, a1
+ lw t1, 0(a0) /* t1 = dst */
+
+ OVER_8888_8_8888 a1, t0, t1, t2, t4, t3, t6, t7, t8
+22:
+ sw t2, 0(a0)
+222:
+ addiu a3, a3, -1
+ bgtz a3, 2b
+ addiu a0, a0, 4
+3:
+ j ra
+ nop
+
+END(pixman_composite_over_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
+/*
+ * a0 - dst (r5g6b5)
+ * a1 - src (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+ beqz a3, 3f
+ nop
+ li t4, 0x00ff00ff
+ li t5, 0xff
+ srl t6, a1, 24 /* t6 = srca */
+ beq t6, t5, 2f /* if (srca == 0xff) */
+ nop
+1:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ beqz t0, 111f /* if (t0 == 0) */
+ addiu a2, a2, 1
+ lhu t1, 0(a0) /* t1 = dst */
+ CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
+ beq t0, t5, 11f /* if (t0 == 0xff) */
+ move t3, a1
+
+ MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t7, t8, t9
+11:
+ not t6, t3
+ srl t6, t6, 24
+ MIPS_UN8x4_MUL_UN8 t2, t6, t2, t4, t7, t8, t9
+ addu_s.qb t1, t2, t3
+ CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
+ sh t2, 0(a0)
+111:
+ addiu a3, a3, -1
+ bgtz a3, 1b
+ addiu a0, a0, 2
+ b 3f
+ nop
+2:
+ /* a1 = src */
+ lbu t0, 0(a2) /* t0 = mask */
+ beqz t0, 222f /* if (t0 == 0) */
+ addiu a2, a2, 1
+ beq t0, t5, 22f /* if (t0 == 0xff) */
+ move t1, a1
+ lhu t1, 0(a0) /* t1 = dst */
+
+ CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
+ OVER_8888_8_8888 a1, t0, t2, t1, t4, t3, t7, t8, t9
+22:
+ CONVERT_1x8888_TO_1x0565 t1, t9, t3, t7
+ sh t9, 0(a0)
+222:
+ addiu a3, a3, -1
+ bgtz a3, 2b
+ addiu a0, a0, 2
+3:
+ j ra
+ nop
+
+END(pixman_composite_over_n_8_0565_asm_mips)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 12ff42c..03a4bb0 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -499,4 +499,30 @@ LEAF_MIPS32R2(symbol) \
precr.qb.ph \d2_8888, \scratch5, \scratch6
.endm
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8_8888 s_8888, \
+ m_8, \
+ d_8888, \
+ out_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, scratch4
+ MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
+ \scratch1, \maskLSR, \
+ \scratch2, \scratch3, \scratch4
+
+ not \scratch2, \scratch1
+ srl \scratch2, \scratch2, 24
+
+ MIPS_UN8x4_MUL_UN8 \d_8888, \scratch2, \
+ \d_8888, \maskLSR, \
+ \scratch3, \scratch4, \out_8888
+
+ addu_s.qb \out_8888, \d_8888, \scratch1
+.endm
+
#endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 018770a..7081734 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -53,6 +53,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
uint32_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
+ uint8_t, 1, uint16_t, 1)
static pixman_bool_t
pixman_fill_mips (uint32_t *bits,
@@ -195,6 +199,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, mips_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, mips_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, mips_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565),
{ PIXMAN_OP_NONE },
};
--
1.7.3
More information about the Pixman
mailing list