[Pixman] [PATCH] MIPS: DSPr2: Added more bilinear fast paths (without mask pixels): - src_8888_8888 - src_8888_0565 - src_0565_8888 - src_0565_0565 - over_8888_8888 - add_8888_8888

Nemanja Lukic nlukic at mips.com
Thu Jun 28 07:23:34 PDT 2012


From: Nemanja Lukic <nemanja.lukic at rt-rk.com>

Performance numbers before/after on MIPS-74kc @ 1GHz:

Referent (before):

           src_8888_8888 =  L1:   2.02  L2:   7.90  M:  6.39 ( 33.91%)  HT:  5.83  VT:  5.75  R:  5.67  RT:  3.86 (  30Kops/s)
           src_8888_0565 =  L1:   7.10  L2:   7.23  M:  6.57 ( 26.12%)  HT:  5.58  VT:  5.55  R:  5.49  RT:  3.56 (  28Kops/s)
           src_0565_x888 =  L1:   3.78  L2:   3.73  M:  3.38 ( 13.46%)  HT:  3.27  VT:  3.23  R:  3.21  RT:  2.59 (  23Kops/s)
           src_0565_0565 =  L1:   3.60  L2:   3.57  M:  3.49 (  9.24%)  HT:  3.20  VT:  3.19  R:  3.17  RT:  2.46 (  22Kops/s)
          over_8888_8888 =  L1:   6.12  L2:   5.72  M:  5.00 ( 26.53%)  HT:  4.42  VT:  4.41  R:  4.34  RT:  3.08 (  26Kops/s)
           add_8888_8888 =  L1:   6.99  L2:   6.46  M:  5.54 ( 29.41%)  HT:  4.85  VT:  4.83  R:  4.73  RT:  3.25 (  27Kops/s)

Optimized:

           src_8888_8888 =  L1:  18.27  L2:  16.69  M: 12.87 ( 68.25%)  HT: 11.80  VT: 11.61  R: 10.60  RT:  7.05 (  41Kops/s)
           src_8888_0565 =  L1:  15.18  L2:  14.10  M: 11.75 ( 46.71%)  HT: 10.64  VT: 10.50  R: 10.03  RT:  7.15 (  41Kops/s)
           src_0565_x888 =  L1:  10.45  L2:   9.96  M:  9.23 ( 36.72%)  HT:  8.39  VT:  8.29  R:  8.02  RT:  5.75 (  37Kops/s)
           src_0565_0565 =  L1:   9.37  L2:   8.98  M:  8.50 ( 22.53%)  HT:  7.71  VT:  7.66  R:  7.52  RT:  5.59 (  37Kops/s)
          over_8888_8888 =  L1:  12.21  L2:  11.01  M:  8.56 ( 45.36%)  HT:  7.71  VT:  7.64  R:  7.43  RT:  5.51 (  36Kops/s)
           add_8888_8888 =  L1:  17.72  L2:  15.16  M: 10.78 ( 57.13%)  HT:  9.46  VT:  9.30  R:  9.00  RT:  6.03 (  38Kops/s)

Benchmark results are obtained using tweaked version of the lowlevel-blt-bench (which does bilinear scaling using almost identity matrix).
---
 pixman/pixman-mips-dspr2-asm.S |  350 ++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.h |   21 +++
 pixman/pixman-mips-dspr2.c     |   29 ++++
 pixman/pixman-mips-dspr2.h     |   49 ++++++
 4 files changed, 449 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 74d1ee9..a3110e8 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -748,6 +748,356 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
 
 END(pixman_composite_over_n_8_0565_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw       s0, 36(sp)     /* s0 = wt */
+    lw       s1, 40(sp)     /* s1 = wb */
+    lw       s2, 44(sp)     /* s2 = vx */
+    lw       s3, 48(sp)     /* s3 = unit_x */
+    li       v0, 256
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, 8      /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a1)     /* t0 = tl */
+    lwx      t1, t8(a1)     /* t1 = tr */
+    addiu    a3, a3, -1
+    lwx      t2, t9(a2)     /* t2 = bl */
+    lwx      t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw       s0, 36(sp)     /* s0 = wt */
+    lw       s1, 40(sp)     /* s1 = wb */
+    lw       s2, 44(sp)     /* s2 = vx */
+    lw       s3, 48(sp)     /* s3 = unit_x */
+    li       v0, 256
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, 8      /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a1)     /* t0 = tl */
+    lwx      t1, t8(a1)     /* t1 = tr */
+    addiu    a3, a3, -1
+    lwx      t2, t9(a2)     /* t2 = bl */
+    lwx      t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sh       t1, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 2
+
+    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)     /* s0 = wt */
+    lw       s1, 48(sp)     /* s1 = wb */
+    lw       s2, 52(sp)     /* s2 = vx */
+    lw       s3, 56(sp)     /* s3 = unit_x */
+    li       v0, 256
+    li       v1, 0x07e007e0
+    li       s8, 0x001f001f
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, 8      /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 1
+    addiu    t8, t9, 2
+    lhx      t0, t9(a1)     /* t0 = tl */
+    lhx      t1, t8(a1)     /* t1 = tr */
+    andi     t1, t1, 0xffff
+    addiu    a3, a3, -1
+    lhx      t2, t9(a2)     /* t2 = bl */
+    lhx      t3, t8(a2)     /* t3 = br */
+    andi     t3, t3, 0xffff
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)     /* s0 = wt */
+    lw       s1, 48(sp)     /* s1 = wb */
+    lw       s2, 52(sp)     /* s2 = vx */
+    lw       s3, 56(sp)     /* s3 = unit_x */
+    li       v0, 256
+    li       v1, 0x07e007e0
+    li       s8, 0x001f001f
+
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, 8      /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 1
+    addiu    t8, t9, 2
+    lhx      t0, t9(a1)     /* t0 = tl */
+    lhx      t1, t8(a1)     /* t1 = tr */
+    andi     t1, t1, 0xffff
+    addiu    a3, a3, -1
+    lhx      t2, t9(a2)     /* t2 = bl */
+    lhx      t3, t8(a2)     /* t3 = br */
+    andi     t3, t3, 0xffff
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sh       t1, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 2
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 40(sp)     /* s0 = wt */
+    lw       s1, 44(sp)     /* s1 = wb */
+    lw       s2, 48(sp)     /* s2 = vx */
+    lw       s3, 52(sp)     /* s3 = unit_x */
+    li       v0, 256
+    li       s8, 0x00ff00ff
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, 8      /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a1)     /* t0 = tl */
+    lwx      t1, t8(a1)     /* t1 = tr */
+    addiu    a3, a3, -1
+    lwx      t2, t9(a2)     /* t2 = bl */
+    lwx      t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lw       t1, 0(a0)      /* t1 = dest */
+    OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sw       t2, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz         a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw           s0, 36(sp)     /* s0 = wt */
+    lw           s1, 40(sp)     /* s1 = wb */
+    lw           s2, 44(sp)     /* s2 = vx */
+    lw           s3, 48(sp)     /* s3 = unit_x */
+    li           v0, 256
+0:
+    andi         t4, s2, 0xffff /* t4 = (short)vx */
+    srl          t4, t4, 8      /* t4 = vx >> 8 */
+    subu         t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul          s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul          s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul          s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul          s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra          t9, s2, 16
+    sll          t9, t9, 2
+    addiu        t8, t9, 4
+    lwx          t0, t9(a1)     /* t0 = tl */
+    lwx          t1, t8(a1)     /* t1 = tr */
+    addiu        a3, a3, -1
+    lwx          t2, t9(a2)     /* t2 = bl */
+    lwx          t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lw           t1, 0(a0)
+    addu_s.qb    t2, t0, t1
+
+    addu         s2, s2, s3     /* vx += unit_x; */
+    sw           t2, 0(a0)
+    bnez         a3, 0b
+     addiu       a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+    j            ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 24b049e..7327dc6 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -566,6 +566,27 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb              \out2_8888, \d2_8888,  \scratch2
 .endm
 
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888). It also requires maskLSR needed for rounding
+ * process. maskLSR must have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8888 s_8888,   \
+                      d_8888,   \
+                      out_8888, \
+                      maskLSR,  \
+                      scratch1, scratch2, scratch3, scratch4
+    not                \scratch1, \s_8888
+    srl                \scratch1, \scratch1, 24
+
+    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch1, \
+                       \out_8888, \maskLSR, \
+                       \scratch2, \scratch3, \scratch4
+
+    addu_s.qb          \out_8888, \out_8888, \s_8888
+.endm
+
 .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
                                     m_8,      \
                                     d_8888,   \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 66c0e5d..63a0225 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,6 +58,19 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
                                        uint8_t, 1, uint16_t, 1)
 
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
+                                          uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
+                                          uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC,
+                                          uint16_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC,
+                                          uint16_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
+                                          uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
+                                          uint32_t, uint32_t)
+
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
                                              uint32_t, uint32_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
@@ -219,6 +232,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 5036938..a3d774f 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -127,6 +127,55 @@ mips_composite_##name (pixman_implementation_t *imp,                \
     }                                                               \
 }
 
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
+                                                 src_type, dst_type)         \
+void                                                                         \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips(                    \
+                                             dst_type *       dst,           \
+                                             const src_type * src_top,       \
+                                             const src_type * src_bottom,    \
+                                             int32_t          w,             \
+                                             int              wt,            \
+                                             int              wb,            \
+                                             pixman_fixed_t   vx,            \
+                                             pixman_fixed_t   unit_x);       \
+static force_inline void                                                     \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,           \
+                                             const uint32_t * mask,          \
+                                             const src_type * src_top,       \
+                                             const src_type * src_bottom,    \
+                                             int32_t          w,             \
+                                             int              wt,            \
+                                             int              wb,            \
+                                             pixman_fixed_t   vx,            \
+                                             pixman_fixed_t   unit_x,        \
+                                             pixman_fixed_t   max_vx,        \
+                                             pixman_bool_t    zero_src)      \
+{                                                                            \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                 \
+        return;                                                              \
+    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top,  \
+                                                              src_bottom, w, \
+                                                              wt, wb,        \
+                                                              vx, unit_x);   \
+}                                                                            \
+                                                                             \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                     \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)       \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                      \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)        \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                       \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)         \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                    \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, NORMAL,                 \
+                       FLAG_NONE)
+
 /*****************************************************************************/
 
 #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op,          \
-- 
1.7.3



More information about the Pixman mailing list