[Pixman] [PATCH 3/3] MIPS: DSPr2: Added more fast-paths for OVER operation:

Nemanja Lukic nlukic at mips.com
Sun Nov 4 13:58:21 PST 2012


From: Nemanja Lukic <nemanja.lukic at rt-rk.com>

Performance numbers before/after on MIPS-74kc @ 1GHz:

lowlevel-blt-bench results

Referent (before):
        over_n_0565 =  L1:  12.04  L2:  21.45  M: 18.50 ( 24.55%)  HT:  6.93  VT:  6.45  R:  6.38  RT:  2.16 (  22Kops/s)
        over_n_8888 =  L1:  93.76  L2:  85.96  M: 24.41 ( 64.78%)  HT:  8.93  VT:  8.08  R:  7.99  RT:  2.54 (  25Kops/s)

Optimized:
        over_n_0565 =  L1:  26.19  L2:  24.93  M: 21.28 ( 28.25%)  HT: 18.75  VT: 18.07  R: 17.85  RT: 11.34 (  57Kops/s)
        over_n_8888 =  L1:  55.31  L2:  49.07  M: 28.60 ( 75.93%)  HT: 23.99  VT: 22.95  R: 22.34  RT: 12.85 (  61Kops/s)
---
 pixman/pixman-mips-dspr2-asm.S |  106 ++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.h |   31 ++++++++++++
 pixman/pixman-mips-dspr2.c     |    9 +++-
 pixman/pixman-mips-dspr2.h     |   36 ++++++++++++++
 4 files changed, 181 insertions(+), 1 deletions(-)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index d2482e0..96d3f97 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1342,6 +1342,112 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
 
 END(pixman_composite_over_8888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1, s2, s3
+    li       t2, 0x00ff00ff
+    li       t5, 0xf800f800
+    li       t6, 0x07e007e0
+    li       t7, 0x001F001F
+    beqz     a2, 3f
+     nop
+    addiu    t1, a2, -1
+    beqz     t1, 2f
+     nop
+1:
+                       /* a1 = source      (32bit constant) */
+    lhu      t0, 0(a0) /* t0 = destination (r5g6b5) */
+                       /* a1 = source      (32bit constant) */
+    lhu      t1, 2(a0) /* t1 = destination (r5g6b5) */
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t3, t4, t6, t7, t8, t9, s0, s1
+    OVER_2x8888_2x8888       a1, a1, t3, t4, t0, t1, t2, t8, t9, s0, s1, s2, s3
+    CONVERT_2x8888_TO_2x0565 t0, t1, t3, t4, t5, t6, t7, t8, t9
+
+    sh       t3, 0(a0)
+    sh       t4, 2(a0)
+    addiu    a2, a2, -2
+    addiu    t1, a2, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a2, 3f
+     nop
+                       /* a1 = source      (32bit constant) */
+    lhu      t0, 0(a0) /* t0 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t0, t1, t3, t4
+    OVER_8888_8888           a1, t1, t0, t2, t3, t4, t5, t6
+    CONVERT_1x8888_TO_1x0565 t0, t1, t3, t4
+
+    sh       t1, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2, s3
+    j        ra
+     nop
+
+END(pixman_composite_over_n_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    beqz     a2, 4f
+     nop
+
+    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
+    li       t4, 0x00ff00ff
+    srl      t9, a2, 2  /* t9 = how many multiples of 4 dst pixels */
+    beqz     t9, 2f     /* branch if less than 4 src pixels */
+     nop
+
+1:
+    beqz     t9, 2f
+     addiu   t9, t9, -1
+
+    lw       t0, 0(a0)  /* t0 = destination (a8r8g8b8) */
+    lw       t1, 4(a0)  /* t1 = destination (a8r8g8b8) */
+    lw       s0, 8(a0)  /* s0 = destination (a8r8g8b8) */
+    lw       s1, 12(a0) /* s1 = destination (a8r8g8b8) */
+
+    OVER_2x8888_2x8888 a1, a1, t0, t1, t2, t3, t4, t5, t6, t7, t8, s4, s5
+    OVER_2x8888_2x8888 a1, a1, s0, s1, s2, s3, t4, t5, t6, t7, t8, s4, s5
+
+    sw       t2, 0(a0)
+    sw       t3, 4(a0)
+    sw       s2, 8(a0)
+    sw       s3, 12(a0)
+    addiu    a2, a2, -4
+    b        1b
+     addiu   a0, a0, 16
+2:
+    beqz     a2, 3f
+     nop
+21:
+    lw       t0, 0(a0) /* t0 = destination (a8r8g8b8) */
+
+    OVER_8888_8888 a1, t0, t1, t4, t3, t2, t5, t6
+
+    sw       t1, 0(a0)
+    addiu    a2, a2, -1
+    bnez     a2, 21b
+     addiu   a0, a0, 4
+3:
+    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
+4:
+    j        ra
+     nop
+
+END(pixman_composite_over_n_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
 /*
  * a0 - dst  (a8)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index b330c0f..dcbac0b 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -587,6 +587,37 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb          \out_8888, \out_8888, \s_8888
 .endm
 
+/*
+ * OVER operation on two a8r8g8b8 source pixels (s1_8888 and s2_8888) and two
+ * a8r8g8b8 destination pixels (d1_8888 and d2_8888). It also requires maskLSR
+ * needed for rounding process. maskLSR must have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_2x8888_2x8888 s1_8888,   \
+                          s2_8888,   \
+                          d1_8888,   \
+                          d2_8888,   \
+                          out1_8888, \
+                          out2_8888, \
+                          maskLSR,   \
+                          scratch1, scratch2, scratch3, \
+                          scratch4, scratch5, scratch6
+    not                   \scratch1,  \s1_8888
+    srl                   \scratch1,  \scratch1, 24
+    not                   \scratch2,  \s2_8888
+    srl                   \scratch2,  \scratch2, 24
+
+    MIPS_2xUN8x4_MUL_2xUN8 \d1_8888,   \d2_8888, \
+                           \scratch1,  \scratch2, \
+                           \out1_8888, \out2_8888, \
+                           \maskLSR, \
+                           \scratch3,  \scratch4, \scratch5, \
+                           \scratch6,  \d1_8888,  \d2_8888
+
+    addu_s.qb             \out1_8888, \out1_8888, \s1_8888
+    addu_s.qb             \out2_8888, \out2_8888, \s2_8888
+.endm
+
 .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
                                     m_8,      \
                                     d_8888,   \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 161377b..11f1254 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -81,6 +81,11 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
                                       uint32_t, 1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
+                                  uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
+                                  uint32_t, 1)
+
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
                                          uint8_t,  1, uint8_t,  1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
@@ -283,7 +288,9 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, mips_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
-
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   mips_composite_over_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, mips_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, mips_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, mips_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, mips_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   mips_composite_over_8888_n_0565),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 3766850..4ac9ff9 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp,             \
     }                                                            \
 }
 
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name,            \
+                                         dst_type, dst_cnt)      \
+void                                                             \
+pixman_composite_##name##_asm_mips (dst_type *dst,               \
+                                    uint32_t  src,               \
+                                    int32_t   w);                \
+                                                                 \
+static void                                                      \
+mips_composite_##name (pixman_implementation_t *imp,             \
+                       pixman_composite_info_t *info)            \
+{                                                                \
+    PIXMAN_COMPOSITE_ARGS (info);                                \
+    dst_type  *dst_line, *dst;                                   \
+    int32_t    dst_stride;                                       \
+    uint32_t   src;                                              \
+                                                                 \
+    src = _pixman_image_get_solid (                              \
+    imp, src_image, dest_image->bits.format);                    \
+                                                                 \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                     \
+        return;                                                  \
+                                                                 \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+                           dst_stride, dst_line, dst_cnt);       \
+                                                                 \
+    while (height--)                                             \
+    {                                                            \
+        dst = dst_line;                                          \
+        dst_line += dst_stride;                                  \
+                                                                 \
+        pixman_composite_##name##_asm_mips (dst, src, width);    \
+    }                                                            \
+}
+
 /*******************************************************************/
 
 #define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name,          \
-- 
1.7.3



More information about the Pixman mailing list