[Pixman] [PATCH 2/3] MIPS: DSPr2: Added more fast-paths for SRC operation:

Nemanja Lukic nlukic at mips.com
Sun Nov 4 13:58:20 PST 2012


From: Nemanja Lukic <nemanja.lukic at rt-rk.com>

Performance numbers before/after on MIPS-74kc @ 1GHz:

lowlevel-blt-bench results

Referent (before):
        src_n_8_8888 =  L1:  13.79  L2:  22.47  M: 17.55 ( 58.28%)  HT:  6.95  VT:  6.46  R:  6.34  RT:  2.07 (  20Kops/s)
           src_n_8_8 =  L1:  20.22  L2:  20.21  M: 18.20 ( 24.17%)  HT:  6.65  VT:  6.22  R:  6.11  RT:  2.03 (  20Kops/s)

Optimized:
        src_n_8_8888 =  L1:  58.31  L2:  53.34  M: 25.69 ( 85.29%)  HT: 22.55  VT: 21.44  R: 19.91  RT: 10.34 (  48Kops/s)
           src_n_8_8 =  L1: 102.60  L2:  89.43  M: 65.01 ( 86.32%)  HT: 37.87  VT: 37.02  R: 32.43  RT: 12.41 (  51Kops/s)
---
 pixman/pixman-mips-dspr2-asm.S |  133 ++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2.c     |    9 +++
 2 files changed, 142 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 9b9b449..d2482e0 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -310,6 +310,139 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
 
 END(pixman_composite_src_x888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+
+    SAVE_REGS_ON_STACK 0, v0
+    li       v0, 0x00ff00ff
+
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+
+1:
+                       /* a1 = source      (32bit constant) */
+    lbu      t0, 0(a2) /* t2 = mask        (a8) */
+    lbu      t1, 1(a2) /* t3 = mask        (a8) */
+    addiu    a2, a2, 2
+
+    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
+
+    sw       t2, 0(a0)
+    sw       t3, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t2, a3, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+
+    beqz     a3, 3f
+     nop
+
+2:
+    lbu      t0, 0(a2)
+    addiu    a2, a2, 1
+
+    MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
+
+    sw       t1, 0(a0)
+    addiu    a3, a3, -1
+    addiu    a0, a0, 4
+
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j        ra
+     nop
+
+END(pixman_composite_src_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    li                t9, 0x00ff00ff
+    beqz              a3, 3f
+     nop
+    srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
+    beqz              t7, 1f      /* branch if less than 4 src pixels */
+     nop
+
+    srl               t8, a1, 24
+    replv.ph          t8, t8
+
+0:
+    beqz              t7, 1f
+     addiu            t7, t7, -1
+    lbu               t0, 0(a2)
+    lbu               t1, 1(a2)
+    lbu               t2, 2(a2)
+    lbu               t3, 3(a2)
+
+    addiu             a2, a2, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr.qb.ph       t0, t3, t1
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, t8
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t2, t2, t3
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a3, a3, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a3, 3f
+     nop
+    srl               t8, a1, 24
+2:
+    lbu               t0, 0(a2)
+    addiu             a2, a2, 1
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0x00ff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+
+    sb                t2, 0(a0)
+    addiu             a3, a3, -1
+    bnez              a3, 2b
+     addiu            a0, a0, 1
+
+3:
+    j                 ra
+     nop
+
+END(pixman_composite_src_n_8_8_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
 /*
  * a0 - dst  (a8r8g8b8)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index bd828df..161377b 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -55,6 +55,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
                                     uint32_t, 1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
                                        uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
@@ -261,6 +265,11 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8r8g8b8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8r8g8b8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8b8g8r8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8b8g8r8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8,       mips_composite_src_n_8_8),
 
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca),
-- 
1.7.3



More information about the Pixman mailing list