[Pixman] [PATCH] MIPS: DSPr2: Added over_n_8_8888 and over_n_8_0565 fast paths.

Nemanja Lukic nlukic at mips.com
Tue Apr 3 10:30:21 PDT 2012


From: Nemanja Lukic <nemanja.lukic at rt-rk.com>

Performance numbers before/after on MIPS-74kc @ 1GHz

Referent (before):

lowlevel-blt-bench:
     over_n_8_8888 =  L1:  10.71  L2:  10.11  M:  8.70 ( 34.57%)  HT:  7.82  VT:  7.77  R:  7.66  RT:  5.37 (  41Kops/s)
     over_n_8_0565 =  L1:   8.24  L2:   8.04  M:  7.49 ( 19.84%)  HT:  6.82  VT:  6.75  R:  6.70  RT:  4.85 (  40Kops/s)
cairo-perf-trace:
[ # ]  backend                         test   min(s) median(s) stddev. count
[ # ]    image: pixman 0.25.3
[  0]    image           swfdec-giant-steps   76.936   77.822   0.49%    6/6
[  1]    image         gnome-system-monitor  277.838  278.500   0.16%    6/6
[ # ]  image16: pixman 0.25.3
[  0]    image16         swfdec-giant-steps   60.598   61.966   1.10%    6/6
[  1]    image16       gnome-system-monitor  277.628  277.675   0.02%    6/6

Optimized:

lowlevel-blt-bench:
     over_n_8_8888 =  L1:  18.38  L2:  17.29  M: 13.49 ( 53.58%)  HT: 11.44  VT: 11.31  R: 11.05  RT:  6.65 (  47Kops/s)
     over_n_8_0565 =  L1:  12.42  L2:  11.86  M: 10.68 ( 28.28%)  HT:  9.27  VT:  9.16  R:  9.04  RT:  5.83 (  44Kops/s)
cairo-perf-trace:
[ # ]  backend                         test   min(s) median(s) stddev. count
[ # ]    image: pixman 0.25.3
[  0]    image           swfdec-giant-steps   71.430   71.593   0.18%    6/6
[  1]    image         gnome-system-monitor  253.903  254.007   0.02%    6/6
[ # ]  image16: pixman 0.25.3
[  0]  image16           swfdec-giant-steps   58.791   59.358   0.62%    6/6
[  1]  image16         gnome-system-monitor  253.713  253.863   0.03%    6/6
---
 pixman/pixman-mips-dspr2-asm.S |  122 ++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.h |   26 +++++++++
 pixman/pixman-mips-dspr2.c     |   10 +++
 3 files changed, 158 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 6a0fc18..e51a718 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -527,3 +527,125 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
      nop
 
 END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    beqz      a3, 3f
+     nop
+    li        t4, 0x00ff00ff
+    li        t5, 0xff
+    srl       t6, a1, 24     /* t6 = srca */
+    beq       t5, t6, 2f     /* if (srca == 0xff) */
+     nop
+1:
+                             /* a1 = src */
+    lbu       t0, 0(a2)      /* t0 = mask */
+    beqz      t0, 111f       /* if (t0 == 0) */
+     addiu    a2, a2, 1
+    move      t3, a1
+    beq       t0, t5, 11f    /* if (t0 == 0xff) */
+     lw       t1, 0(a0)      /* t1 = dst */
+
+    MIPS_UN8x4_MUL_UN8 a1, t0, t3, t4, t6, t7, t8
+11:
+    not       t2, t3
+    srl       t2, t2, 24
+    MIPS_UN8x4_MUL_UN8 t1, t2, t1, t4, t6, t7, t8
+    addu_s.qb t2, t1, t3
+    sw        t2, 0(a0)
+111:
+    addiu     a3, a3, -1
+    bgtz      a3, 1b
+     addiu    a0, a0, 4
+    b         3f
+     nop
+2:
+                             /* a1 = src */
+    lbu       t0, 0(a2)      /* t0 = mask */
+    beqz      t0, 222f       /* if (t0 == 0) */
+     addiu    a2, a2, 1
+    beq       t0, t5, 22f    /* if (t0 == 0xff) */
+     move     t2, a1
+    lw        t1, 0(a0)      /* t1 = dst */
+
+    OVER_8888_8_8888   a1, t0, t1, t2, t4, t3, t6, t7, t8
+22:
+    sw        t2, 0(a0)
+222:
+    addiu     a3, a3, -1
+    bgtz      a3, 2b
+     addiu    a0, a0, 4
+3:
+    j         ra
+     nop
+
+END(pixman_composite_over_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    beqz     a3, 3f
+     nop
+    li       t4, 0x00ff00ff
+    li       t5, 0xff
+    srl      t6, a1, 24     /* t6 = srca */
+    beq      t6, t5, 2f     /* if (srca == 0xff) */
+     nop
+1:
+                            /* a1 = src */
+    lbu      t0, 0(a2)      /* t0 = mask */
+    beqz     t0, 111f       /* if (t0 == 0) */
+     addiu   a2, a2, 1
+    lhu     t1, 0(a0)       /* t1 = dst */
+    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
+    beq      t0, t5, 11f    /* if (t0 == 0xff) */
+     move    t3, a1
+
+    MIPS_UN8x4_MUL_UN8       a1, t0, t3, t4, t7, t8, t9
+11:
+    not      t6, t3
+    srl      t6, t6, 24
+    MIPS_UN8x4_MUL_UN8       t2, t6, t2, t4, t7, t8, t9
+    addu_s.qb                t1, t2, t3
+    CONVERT_1x8888_TO_1x0565 t1, t2, t3, t7
+    sh       t2, 0(a0)
+111:
+    addiu    a3, a3, -1
+    bgtz     a3, 1b
+     addiu   a0, a0, 2
+    b        3f
+     nop
+2:
+                            /* a1 = src */
+    lbu      t0, 0(a2)      /* t0 = mask */
+    beqz     t0, 222f       /* if (t0 == 0) */
+     addiu   a2, a2, 1
+    beq      t0, t5, 22f    /* if (t0 == 0xff) */
+     move    t1, a1
+    lhu      t1, 0(a0)      /* t1 = dst */
+
+    CONVERT_1x0565_TO_1x8888 t1, t2, t3, t7
+    OVER_8888_8_8888         a1, t0, t2, t1, t4, t3, t7, t8, t9
+22:
+    CONVERT_1x8888_TO_1x0565 t1, t9, t3, t7
+    sh       t9, 0(a0)
+222:
+    addiu    a3, a3, -1
+    bgtz     a3, 2b
+     addiu   a0, a0, 2
+3:
+    j        ra
+     nop
+
+END(pixman_composite_over_n_8_0565_asm_mips)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 12ff42c..03a4bb0 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -499,4 +499,30 @@ LEAF_MIPS32R2(symbol)                                   \
     precr.qb.ph       \d2_8888,  \scratch5, \scratch6
 .endm
 
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888) using a8 mask (m_8). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8_8888 s_8888,   \
+                        m_8,      \
+                        d_8888,   \
+                        out_8888, \
+                        maskLSR,  \
+                        scratch1, scratch2, scratch3, scratch4
+    MIPS_UN8x4_MUL_UN8 \s_8888,   \m_8, \
+                       \scratch1, \maskLSR, \
+                       \scratch2, \scratch3, \scratch4
+
+    not                \scratch2, \scratch1
+    srl                \scratch2, \scratch2, 24
+
+    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch2, \
+                       \d_8888,   \maskLSR,  \
+                       \scratch3, \scratch4, \out_8888
+
+    addu_s.qb          \out_8888, \d_8888,   \scratch1
+.endm
+
 #endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 018770a..7081734 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -53,6 +53,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
                                        uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
                                        uint32_t, 1, uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
+                                       uint8_t, 1, uint16_t, 1)
 
 static pixman_bool_t
 pixman_fill_mips (uint32_t *bits,
@@ -195,6 +199,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, mips_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5,   mips_composite_over_n_8888_0565_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5,   mips_composite_over_n_8888_0565_ca),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8r8g8b8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8r8g8b8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       a8b8g8r8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, mips_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
     { PIXMAN_OP_NONE },
 };
-- 
1.7.3



More information about the Pixman mailing list