[Pixman] [PATCH 4/9] MIPS: DSPr2: Added src_0565_8888 nearest neighbor fast path.
Nemanja Lukic
nemanja.lukic at rt-rk.com
Mon Apr 15 10:32:57 PDT 2013
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench results
Referent (before):
src_0565_8888 = L1: 20.70 L2: 19.22 M: 12.50 ( 49.79%) HT: 10.45 VT: 10.18 R: 9.99 RT: 5.31 ( 31Kops/s)
Optimized:
src_0565_8888 = L1: 62.98 L2: 53.44 M: 23.07 ( 91.87%) HT: 19.85 VT: 19.15 R: 17.70 RT: 9.68 ( 43Kops/s)
---
pixman/pixman-mips-dspr2-asm.S | 59 ++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2-asm.h | 21 +++++++-------
pixman/pixman-mips-dspr2.c | 10 +++++++
3 files changed, 79 insertions(+), 11 deletions(-)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 3996756..b94e66f 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -3196,6 +3196,65 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (r5g6b5)
+ * a2 - w
+ * a3 - vx
+ * 16(sp) - unit_x
+ */
+
+ SAVE_REGS_ON_STACK 0, v0
+ beqz a2, 3f
+ nop
+
+ lw v0, 16(sp) /* v0 = unit_x */
+ addiu t1, a2, -1
+ beqz t1, 2f
+ nop
+
+ li t4, 0x07e007e0
+ li t5, 0x001F001F
+1:
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */
+ addu t0, a1, t0
+ lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */
+ addu a3, a3, v0 /* a3 = vx + unit_x */
+ sra t1, a3, 16 /* t1 = vx >> 16 */
+ sll t1, t1, 1 /* t1 = t1 * 2 ((r5g6b5)) */
+ addu t1, a1, t1
+ lhu t1, 0(t1) /* t1 = source ((r5g6b5)) */
+ addu a3, a3, v0 /* a3 = vx + unit_x */
+ addiu a2, a2, -2
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
+
+ sw t2, 0(a0)
+ sw t3, 4(a0)
+
+ addiu t2, a2, -1
+ bgtz t2, 1b
+ addiu a0, a0, 8
+2:
+ beqz a2, 3f
+ nop
+ sra t0, a3, 16 /* t0 = vx >> 16 */
+ sll t0, t0, 1 /* t0 = t0 * 2 ((r5g6b5)) */
+ addu t0, a1, t0
+ lhu t0, 0(t0) /* t0 = source ((r5g6b5)) */
+
+ CONVERT_1x0565_TO_1x8888 t0, t1, t2, t3
+
+ sw t1, 0(a0)
+3:
+ RESTORE_REGS_FROM_STACK 0, v0
+ j ra
+ nop
+
+END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
/*
* a0 - dst (r5g6b5)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index bc458b6..cab122d 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -354,17 +354,16 @@ LEAF_MIPS32R2(symbol) \
out1_565, out2_565, \
maskR, maskG, maskB, \
scratch1, scratch2
- precrq.ph.w \scratch1, \in2_8888, \in1_8888
- precr_sra.ph.w \in2_8888, \in1_8888, 0
- shll.ph \scratch1, \scratch1, 8
- srl \in2_8888, \in2_8888, 3
- and \scratch2, \in2_8888, \maskB
- and \scratch1, \scratch1, \maskR
- srl \in2_8888, \in2_8888, 2
- and \out2_565, \in2_8888, \maskG
- or \out2_565, \out2_565, \scratch2
- or \out1_565, \out2_565, \scratch1
- srl \out2_565, \out1_565, 16
+ precr.qb.ph \scratch1, \in2_8888, \in1_8888
+ precrq.qb.ph \in2_8888, \in2_8888, \in1_8888
+ and \out1_565, \scratch1, \maskR
+ shrl.ph \scratch1, \scratch1, 3
+ shll.ph \in2_8888, \in2_8888, 3
+ and \scratch1, \scratch1, \maskB
+ or \out1_565, \out1_565, \scratch1
+ and \in2_8888, \in2_8888, \maskG
+ or \out1_565, \out1_565, \in2_8888
+ srl \out2_565, \out1_565, 16
.endm
/*
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index c227feb..1949921 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -125,6 +125,8 @@ PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
uint32_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
+ uint16_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
uint32_t, uint32_t)
@@ -370,6 +372,14 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_0565),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, x8b8g8r8, mips_0565_8888),
+ PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+ /* Note: NONE repeat is not supported yet */
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, mips_0565_8888),
+ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, mips_0565_8888),
+
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
--
1.7.3
More information about the Pixman
mailing list