pixman: Branch 'master' - 2 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Sun Jul 8 11:42:21 PDT 2012
pixman/pixman-mips-dspr2-asm.S | 704 +++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2-asm.h | 34 +
pixman/pixman-mips-dspr2.c | 51 ++
pixman/pixman-mips-dspr2.h | 49 ++
4 files changed, 838 insertions(+)
New commits:
commit 86ad09b548b45a5a5074d9d83970d5e7e7f89d31
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date: Mon Jul 2 20:54:20 2012 +0200
MIPS: DSPr2: Added more bilinear fast paths (without mask)
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench -b
Referent (before):
src_8888_8888 = L1: 8.18 L2: 7.79 M: 6.32 ( 33.51%) HT: 5.78 VT: 5.70 R: 5.61 RT: 3.79 ( 29Kops/s)
src_8888_0565 = L1: 6.90 L2: 7.14 M: 6.47 ( 25.75%) HT: 5.54 VT: 5.51 R: 5.46 RT: 3.53 ( 28Kops/s)
src_0565_x888 = L1: 3.76 L2: 3.71 M: 3.37 ( 13.41%) HT: 3.26 VT: 3.22 R: 3.20 RT: 2.58 ( 23Kops/s)
src_0565_0565 = L1: 3.59 L2: 3.56 M: 3.47 ( 9.19%) HT: 3.19 VT: 3.18 R: 3.16 RT: 2.46 ( 22Kops/s)
over_8888_8888 = L1: 5.99 L2: 5.66 M: 4.95 ( 26.28%) HT: 4.40 VT: 4.38 R: 4.31 RT: 3.02 ( 26Kops/s)
add_8888_8888 = L1: 6.84 L2: 6.39 M: 5.48 ( 29.09%) HT: 4.80 VT: 4.79 R: 4.70 RT: 3.20 ( 27Kops/s)
Optimized:
src_8888_8888 = L1: 18.27 L2: 16.69 M: 12.87 ( 68.25%) HT: 11.80 VT: 11.61 R: 10.60 RT: 7.05 ( 41Kops/s)
src_8888_0565 = L1: 15.18 L2: 14.10 M: 11.75 ( 46.71%) HT: 10.64 VT: 10.50 R: 10.03 RT: 7.15 ( 41Kops/s)
src_0565_x888 = L1: 10.45 L2: 9.96 M: 9.23 ( 36.72%) HT: 8.39 VT: 8.29 R: 8.02 RT: 5.75 ( 37Kops/s)
src_0565_0565 = L1: 9.37 L2: 8.98 M: 8.50 ( 22.53%) HT: 7.71 VT: 7.66 R: 7.52 RT: 5.59 ( 37Kops/s)
over_8888_8888 = L1: 12.21 L2: 11.01 M: 8.56 ( 45.36%) HT: 7.71 VT: 7.64 R: 7.43 RT: 5.51 ( 36Kops/s)
add_8888_8888 = L1: 17.72 L2: 15.16 M: 10.78 ( 57.13%) HT: 9.46 VT: 9.30 R: 9.00 RT: 6.03 ( 38Kops/s)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index ac56746..a8fccd5 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -749,6 +749,373 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
END(pixman_composite_over_n_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *src_top
+ * a2 - *src_bottom
+ * a3 - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+ beqz a3, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+ lw s0, 36(sp) /* s0 = wt */
+ lw s1, 40(sp) /* s1 = wb */
+ lw s2, 44(sp) /* s2 = vx */
+ lw s3, 48(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a1) /* t0 = tl */
+ lwx t1, t8(a1) /* t1 = tr */
+ addiu a3, a3, -1
+ lwx t2, t9(a2) /* t2 = bl */
+ lwx t3, t8(a2) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t0, 0(a0)
+ bnez a3, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *src_top
+ * a2 - *src_bottom
+ * a3 - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+ beqz a3, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+ lw s0, 36(sp) /* s0 = wt */
+ lw s1, 40(sp) /* s1 = wb */
+ lw s2, 44(sp) /* s2 = vx */
+ lw s3, 48(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a1) /* t0 = tl */
+ lwx t1, t8(a1) /* t1 = tr */
+ addiu a3, a3, -1
+ lwx t2, t9(a2) /* t2 = bl */
+ lwx t3, t8(a2) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sh t1, 0(a0)
+ bnez a3, 0b
+ addiu a0, a0, 2
+
+ RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *src_top
+ * a2 - *src_bottom
+ * a3 - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+ beqz a3, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lw s0, 44(sp) /* s0 = wt */
+ lw s1, 48(sp) /* s1 = wb */
+ lw s2, 52(sp) /* s2 = vx */
+ lw s3, 56(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+ li v1, 0x07e007e0
+ li s8, 0x001f001f
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 1
+ addiu t8, t9, 2
+ lhx t0, t9(a1) /* t0 = tl */
+ lhx t1, t8(a1) /* t1 = tr */
+ andi t1, t1, 0xffff
+ addiu a3, a3, -1
+ lhx t2, t9(a2) /* t2 = bl */
+ lhx t3, t8(a2) /* t3 = br */
+ andi t3, t3, 0xffff
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t0, 0(a0)
+ bnez a3, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *src_top
+ * a2 - *src_bottom
+ * a3 - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+ beqz a3, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lw s0, 44(sp) /* s0 = wt */
+ lw s1, 48(sp) /* s1 = wb */
+ lw s2, 52(sp) /* s2 = vx */
+ lw s3, 56(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+ li v1, 0x07e007e0
+ li s8, 0x001f001f
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 1
+ addiu t8, t9, 2
+ lhx t0, t9(a1) /* t0 = tl */
+ lhx t1, t8(a1) /* t1 = tr */
+ andi t1, t1, 0xffff
+ addiu a3, a3, -1
+ lhx t2, t9(a2) /* t2 = bl */
+ lhx t3, t8(a2) /* t3 = br */
+ andi t3, t3, 0xffff
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sh t1, 0(a0)
+ bnez a3, 0b
+ addiu a0, a0, 2
+
+ RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *src_top
+ * a2 - *src_bottom
+ * a3 - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+ beqz a3, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lw s0, 40(sp) /* s0 = wt */
+ lw s1, 44(sp) /* s1 = wb */
+ lw s2, 48(sp) /* s2 = vx */
+ lw s3, 52(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+ li s8, 0x00ff00ff
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a1) /* t0 = tl */
+ lwx t1, t8(a1) /* t1 = tr */
+ addiu a3, a3, -1
+ lwx t2, t9(a2) /* t2 = bl */
+ lwx t3, t8(a2) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lw t1, 0(a0) /* t1 = dest */
+ OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t2, 0(a0)
+ bnez a3, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *src_top
+ * a2 - *src_bottom
+ * a3 - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+ beqz a3, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+ lw s0, 36(sp) /* s0 = wt */
+ lw s1, 40(sp) /* s1 = wb */
+ lw s2, 44(sp) /* s2 = vx */
+ lw s3, 48(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a1) /* t0 = tl */
+ lwx t1, t8(a1) /* t1 = tr */
+ addiu a3, a3, -1
+ lwx t2, t9(a2) /* t2 = bl */
+ lwx t3, t8(a2) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lw t1, 0(a0)
+ addu_s.qb t2, t0, t1
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t2, 0(a0)
+ bnez a3, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
/*
* a0 - *dst
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 24b049e..7327dc6 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -566,6 +566,27 @@ LEAF_MIPS32R2(symbol) \
addu_s.qb \out2_8888, \d2_8888, \scratch2
.endm
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888). It also requires maskLSR needed for rounding
+ * process. maskLSR must have following value:
+ * li maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8888 s_8888, \
+ d_8888, \
+ out_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3, scratch4
+ not \scratch1, \s_8888
+ srl \scratch1, \scratch1, 24
+
+ MIPS_UN8x4_MUL_UN8 \d_8888, \scratch1, \
+ \out_8888, \maskLSR, \
+ \scratch2, \scratch3, \scratch4
+
+ addu_s.qb \out_8888, \out_8888, \s_8888
+.endm
+
.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
m_8, \
d_8888, \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 66c0e5d..63a0225 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,6 +58,19 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
uint8_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
+ uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
+ uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC,
+ uint16_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC,
+ uint16_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
+ uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
+ uint32_t, uint32_t)
+
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
uint32_t, uint32_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
@@ -219,6 +232,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565),
+
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+ SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565),
+
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+
+ SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+ SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 5036938..a3d774f 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -127,6 +127,55 @@ mips_composite_##name (pixman_implementation_t *imp, \
} \
}
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips( \
+ dst_type * dst, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+static force_inline void \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \
+ const uint32_t * mask, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top, \
+ src_bottom, w, \
+ wt, wb, \
+ vx, unit_x); \
+} \
+ \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, NORMAL, \
+ FLAG_NONE)
+
/*****************************************************************************/
#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \
commit 707a8be11280c4d395e662e869d4a98d75bb5571
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date: Mon Jul 2 20:54:19 2012 +0200
MIPS: DSPr2: Added several bilinear fast paths with a8 mask
Performance numbers before/after on MIPS-74kc @ 1GHz:
lowlevel-blt-bench -b
Referent (before):
src_8888_8_8888 = L1: 6.37 L2: 6.08 M: 5.46 ( 32.57%) HT: 4.64 VT: 4.61 R: 4.52 RT: 2.85 ( 23Kops/s)
src_8888_8_0565 = L1: 5.89 L2: 5.66 M: 5.11 ( 23.71%) HT: 4.36 VT: 4.34 R: 4.26 RT: 2.71 ( 22Kops/s)
src_0565_8_x888 = L1: 3.32 L2: 3.27 M: 3.17 ( 14.71%) HT: 2.86 VT: 2.84 R: 2.81 RT: 2.07 ( 19Kops/s)
src_0565_8_0565 = L1: 3.19 L2: 3.15 M: 3.05 ( 10.11%) HT: 2.75 VT: 2.74 R: 2.71 RT: 2.00 ( 18Kops/s)
over_8888_8_8888 = L1: 4.99 L2: 4.71 M: 4.11 ( 27.22%) HT: 3.59 VT: 3.58 R: 3.50 RT: 2.36 ( 21Kops/s)
add_8888_8_8888 = L1: 5.60 L2: 5.26 M: 4.52 ( 29.95%) HT: 3.92 VT: 3.89 R: 3.80 RT: 2.49 ( 21Kops/s)
Optimized:
src_8888_8_8888 = L1: 13.19 L2: 12.13 M: 9.75 ( 58.22%) HT: 8.60 VT: 8.44 R: 7.90 RT: 5.06 ( 33Kops/s)
src_8888_8_0565 = L1: 11.64 L2: 10.81 M: 9.18 ( 42.63%) HT: 8.04 VT: 7.90 R: 7.57 RT: 5.02 ( 32Kops/s)
src_0565_8_x888 = L1: 8.34 L2: 7.95 M: 7.29 ( 33.85%) HT: 6.55 VT: 6.48 R: 6.25 RT: 4.35 ( 30Kops/s)
src_0565_8_0565 = L1: 7.71 L2: 7.35 M: 6.90 ( 22.90%) HT: 6.14 VT: 6.10 R: 5.94 RT: 4.07 ( 29Kops/s)
over_8888_8_8888 = L1: 9.73 L2: 8.99 M: 7.15 ( 47.41%) HT: 6.40 VT: 6.30 R: 6.11 RT: 4.28 ( 30Kops/s)
add_8888_8_8888 = L1: 13.01 L2: 11.72 M: 8.70 ( 57.68%) HT: 7.59 VT: 7.46 R: 7.20 RT: 4.74 ( 32Kops/s)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 48f108e..ac56746 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -749,6 +749,278 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
END(pixman_composite_over_n_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *mask
+ * a2 - *src_top
+ * a3 - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+ lw v1, 32(sp)
+ beqz v1, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lw s0, 44(sp) /* s0 = wt */
+ lw s1, 48(sp) /* s1 = wb */
+ lw s2, 52(sp) /* s2 = vx */
+ lw s3, 56(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+ li s8, 0x00ff00ff
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a2) /* t0 = tl */
+ lwx t1, t8(a2) /* t1 = tr */
+ addiu v1, v1, -1
+ lwx t2, t9(a3) /* t2 = bl */
+ lwx t3, t8(a3) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lbu t1, 0(a1) /* t1 = mask */
+ addiu a1, a1, 1
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t0, 0(a0)
+ bnez v1, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *mask
+ * a2 - *src_top
+ * a3 - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+ lw v1, 32(sp)
+ beqz v1, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lw s0, 44(sp) /* s0 = wt */
+ lw s1, 48(sp) /* s1 = wb */
+ lw s2, 52(sp) /* s2 = vx */
+ lw s3, 56(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+ li s8, 0x00ff00ff
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a2) /* t0 = tl */
+ lwx t1, t8(a2) /* t1 = tr */
+ addiu v1, v1, -1
+ lwx t2, t9(a3) /* t2 = bl */
+ lwx t3, t8(a3) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lbu t1, 0(a1) /* t1 = mask */
+ addiu a1, a1, 1
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
+ CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sh t1, 0(a0)
+ bnez v1, 0b
+ addiu a0, a0, 2
+
+ RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *mask
+ * a2 - *src_top
+ * a3 - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+ lw t0, 32(sp)
+ beqz t0, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+
+ lw s0, 48(sp) /* s0 = wt */
+ lw s1, 52(sp) /* s1 = wb */
+ lw s2, 56(sp) /* s2 = vx */
+ lw s3, 60(sp) /* s3 = unit_x */
+ lw ra, 64(sp) /* ra = w */
+ li v0, 0x00ff00ff
+ li v1, 0x07e007e0
+ li s8, 0x001f001f
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ li t5, BILINEAR_INTERPOLATION_RANGE
+ subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 1
+ addiu t8, t9, 2
+ lhx t0, t9(a2) /* t0 = tl */
+ lhx t1, t8(a2) /* t1 = tr */
+ andi t1, t1, 0xffff
+ addiu ra, ra, -1
+ lhx t2, t9(a3) /* t2 = bl */
+ lhx t3, t8(a3) /* t3 = br */
+ andi t3, t3, 0xffff
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lbu t1, 0(a1) /* t1 = mask */
+ addiu a1, a1, 1
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t0, 0(a0)
+ bnez ra, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *mask
+ * a2 - *src_top
+ * a3 - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+ lw t0, 32(sp)
+ beqz t0, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+
+ lw s0, 48(sp) /* s0 = wt */
+ lw s1, 52(sp) /* s1 = wb */
+ lw s2, 56(sp) /* s2 = vx */
+ lw s3, 60(sp) /* s3 = unit_x */
+ lw ra, 64(sp) /* ra = w */
+ li v0, 0x00ff00ff
+ li v1, 0x07e007e0
+ li s8, 0x001f001f
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ li t5, BILINEAR_INTERPOLATION_RANGE
+ subu t5, t5, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 1
+ addiu t8, t9, 2
+ lhx t0, t9(a2) /* t0 = tl */
+ lhx t1, t8(a2) /* t1 = tr */
+ andi t1, t1, 0xffff
+ addiu ra, ra, -1
+ lhx t2, t9(a3) /* t2 = bl */
+ lhx t3, t8(a3) /* t3 = br */
+ andi t3, t3, 0xffff
+
+ CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+ CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lbu t1, 0(a1) /* t1 = mask */
+ addiu a1, a1, 1
+ MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
+ CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sh t1, 0(a0)
+ bnez ra, 0b
+ addiu a0, a0, 2
+
+ RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+
LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
/*
* a0 - dst (a8r8g8b8)
@@ -815,3 +1087,68 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
nop
END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
+/*
+ * a0 - *dst
+ * a1 - *mask
+ * a2 - *src_top
+ * a3 - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+ lw v1, 32(sp)
+ beqz v1, 1f
+ nop
+
+ SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+ lw s0, 44(sp) /* s0 = wt */
+ lw s1, 48(sp) /* s1 = wb */
+ lw s2, 52(sp) /* s2 = vx */
+ lw s3, 56(sp) /* s3 = unit_x */
+ li v0, BILINEAR_INTERPOLATION_RANGE
+ li s8, 0x00ff00ff
+
+ sll s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+ sll s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+ andi t4, s2, 0xffff /* t4 = (short)vx */
+ srl t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+ subu t5, v0, t4 /* t5 = ( 256 - (vx>>8)) */
+
+ mul s4, s0, t5 /* s4 = wt*(256-(vx>>8)) */
+ mul s5, s0, t4 /* s5 = wt*(vx>>8) */
+ mul s6, s1, t5 /* s6 = wb*(256-(vx>>8)) */
+ mul s7, s1, t4 /* s7 = wb*(vx>>8) */
+
+ sra t9, s2, 16
+ sll t9, t9, 2
+ addiu t8, t9, 4
+ lwx t0, t9(a2) /* t0 = tl */
+ lwx t1, t8(a2) /* t1 = tr */
+ addiu v1, v1, -1
+ lwx t2, t9(a3) /* t2 = bl */
+ lwx t3, t8(a3) /* t3 = br */
+
+ BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+ lbu t1, 0(a1) /* t1 = mask */
+ lw t2, 0(a0) /* t2 = dst */
+ addiu a1, a1, 1
+ MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
+
+ addu s2, s2, s3 /* vx += unit_x; */
+ sw t0, 0(a0)
+ bnez v1, 0b
+ addiu a0, a0, 4
+
+ RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+ j ra
+ nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 7cf3281..24b049e 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -566,6 +566,19 @@ LEAF_MIPS32R2(symbol) \
addu_s.qb \out2_8888, \d2_8888, \scratch2
.endm
+.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888, \
+ m_8, \
+ d_8888, \
+ out_8888, \
+ maskLSR, \
+ scratch1, scratch2, scratch3
+ MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
+ \out_8888, \maskLSR, \
+ \scratch1, \scratch2, \scratch3
+
+ addu_s.qb \out_8888, \out_8888, \d_8888
+.endm
+
.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br, \
scratch1, scratch2, \
alpha, red, green, blue \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 06d4335..66c0e5d 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,8 +58,18 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
uint8_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
+ uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
+ uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC,
+ uint16_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC,
+ uint16_t, uint16_t)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER,
uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD,
+ uint32_t, uint32_t)
static pixman_bool_t
pixman_fill_mips (uint32_t *bits,
@@ -209,9 +219,21 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, mips_composite_over_n_8_0565),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, mips_composite_over_n_8_0565),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_8_0565),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_8_0565),
+
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8_x888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_8_0565),
+
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
+ SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
{ PIXMAN_OP_NONE },
};
More information about the xorg-commit
mailing list