[Pixman] [PATCH 1/4] MIPS: DSPr2: Added fast-paths for OVER operation: - over_8888_n_8888 - over_8888_8_8888

Nemanja Lukic nlukic at mips.com
Fri Sep 14 00:31:23 PDT 2012


From: Nemanja Lukic <nemanja.lukic at rt-rk.com>

Performance numbers before/after on MIPS-74kc @ 1GHz:

lowlevel-blt-bench results

Referent (before):
        over_8888_n_8888 =  L1:   9.92  L2:  11.27  M:  8.50 ( 45.23%)  HT:  4.70  VT:  4.45  R:  4.49  RT:  1.85 (  20Kops/s)
        over_8888_8_8888 =  L1:  12.54  L2:  10.86  M:  8.18 ( 54.36%)  HT:  6.53  VT:  6.45  R:  6.41  RT:  3.83 (  33Kops/s)

Optimized:
        over_8888_n_8888 =  L1:  28.02  L2:  24.92  M: 14.72 ( 78.15%)  HT: 13.03  VT: 12.65  R: 12.00  RT:  7.49 (  49Kops/s)
        over_8888_8_8888 =  L1:  26.92  L2:  23.93  M: 13.65 ( 90.58%)  HT: 11.68  VT: 11.29  R: 10.56  RT:  6.37 (  45Kops/s)
---
 pixman/pixman-mips-dspr2-asm.S |  102 ++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2.c     |   13 +++++
 pixman/pixman-mips-dspr2.h     |   83 ++++++++++++++++++++++++++++++++
 3 files changed, 198 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index a8fccd5..165f177 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -749,6 +749,108 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
 
 END(pixman_composite_over_n_8_0565_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - mask (32bit constant)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0
+    li       t4, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    srl      a2, a2, 24
+    beqz     t1, 2f
+     nop
+
+1:
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
+                       /* a2 = mask        (32bit constant) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    lw       t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+    addiu    a1, a1, 8
+
+    OVER_2x8888_2x8_2x8888 t0, t1, a2, a2, t2, t3, \
+                           t5, t6, t4, t7, t8, t9, t0, t1, s0
+
+    sw       t5, 0(a0)
+    sw       t6, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a3, 3f
+     nop
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+                       /* a2 = mask        (32bit constant) */
+    lw       t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+    OVER_8888_8_8888 t0, a2, t1, t3, t4, t5, t6, t7, t8
+
+    sw       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0
+    j        ra
+     nop
+
+END(pixman_composite_over_8888_n_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (a8r8g8b8)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    SAVE_REGS_ON_STACK 0, s0, s1
+    li       t4, 0x00ff00ff
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lw       t1, 4(a1) /* t1 = source      (a8r8g8b8) */
+    lbu      t2, 0(a2) /* t2 = mask        (a8) */
+    lbu      t3, 1(a2) /* t3 = mask        (a8) */
+    lw       t5, 0(a0) /* t5 = destination (a8r8g8b8) */
+    lw       t6, 4(a0) /* t6 = destination (a8r8g8b8) */
+    addiu    a1, a1, 8
+    addiu    a2, a2, 2
+
+    OVER_2x8888_2x8_2x8888 t0, t1, t2, t3, t5, t6, \
+                           t7, t8, t4, t9, s0, s1, t0, t1, t2
+
+    sw       t7, 0(a0)
+    sw       t8, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 8
+2:
+    beqz     a3, 3f
+     nop
+    lw       t0, 0(a1) /* t0 = source      (a8r8g8b8) */
+    lbu      t1, 0(a2) /* t1 = mask        (a8) */
+    lw       t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+
+    OVER_8888_8_8888 t0, t1, t2, t3, t4, t5, t6, t7, t8
+
+    sw       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 0, s0, s1
+    j        ra
+     nop
+
+END(pixman_composite_over_8888_8_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 63a0225..1c0baa6 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,6 +58,12 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
                                        uint8_t, 1, uint16_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888,
+                                      uint32_t, 1, uint32_t, 1)
+
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
+                                         uint8_t, 1, uint32_t, 1)
+
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
                                           uint32_t, uint32_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
@@ -232,6 +238,13 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, mips_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, mips_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       a8r8g8b8, mips_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       x8r8g8b8, mips_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       a8b8g8r8, mips_composite_over_8888_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, mips_composite_over_8888_8_8888),
+
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index a3d774f..bddcfd8 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -127,6 +127,89 @@ mips_composite_##name (pixman_implementation_t *imp,                \
     }                                                               \
 }
 
+/*******************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name,           \
+                                            src_type, src_cnt,      \
+                                            dst_type, dst_cnt)      \
+void                                                                \
+pixman_composite_##name##_asm_mips (dst_type  *dst,                 \
+                                    src_type  *src,                 \
+                                    uint32_t   mask,                \
+                                    int32_t    w);                  \
+                                                                    \
+static void                                                         \
+mips_composite_##name (pixman_implementation_t *imp,                \
+                       pixman_composite_info_t *info)               \
+{                                                                   \
+    PIXMAN_COMPOSITE_ARGS (info);                                   \
+    dst_type  *dst_line, *dst;                                      \
+    src_type  *src_line, *src;                                      \
+    int32_t    dst_stride, src_stride;                              \
+    uint32_t   mask;                                                \
+                                                                    \
+    mask = _pixman_image_get_solid (                                \
+        imp, mask_image, dest_image->bits.format);                  \
+                                                                    \
+    if ((flags & SKIP_ZERO_MASK) && mask == 0)                      \
+        return;                                                     \
+                                                                    \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,    \
+                           dst_stride, dst_line, dst_cnt);          \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,       \
+                           src_stride, src_line, src_cnt);          \
+                                                                    \
+    while (height--)                                                \
+    {                                                               \
+        dst = dst_line;                                             \
+        dst_line += dst_stride;                                     \
+        src = src_line;                                             \
+        src_line += src_stride;                                     \
+                                                                    \
+        pixman_composite_##name##_asm_mips (dst, src, mask, width); \
+    }                                                               \
+}
+
+/************************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \
+                                                mask_type, mask_cnt,     \
+                                                dst_type, dst_cnt)       \
+void                                                                     \
+pixman_composite_##name##_asm_mips (dst_type  *dst,                      \
+                                    src_type  *src,                      \
+                                    mask_type *mask,                     \
+                                    int32_t   w);                        \
+                                                                         \
+static void                                                              \
+mips_composite_##name (pixman_implementation_t *imp,                     \
+                       pixman_composite_info_t *info)                    \
+{                                                                        \
+    PIXMAN_COMPOSITE_ARGS (info);                                        \
+    dst_type  *dst_line, *dst;                                           \
+    src_type  *src_line, *src;                                           \
+    mask_type *mask_line, *mask;                                         \
+    int32_t    dst_stride, src_stride, mask_stride;                      \
+                                                                         \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);               \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,            \
+                           src_stride, src_line, src_cnt);               \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,        \
+                           mask_stride, mask_line, mask_cnt);            \
+                                                                         \
+    while (height--)                                                     \
+    {                                                                    \
+        dst = dst_line;                                                  \
+        dst_line += dst_stride;                                          \
+        mask = mask_line;                                                \
+        mask_line += mask_stride;                                        \
+        src = src_line;                                                  \
+        src_line += src_stride;                                          \
+        pixman_composite_##name##_asm_mips (dst, src, mask, width);      \
+    }                                                                    \
+}
+
 /****************************************************************************/
 
 #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
-- 
1.7.3



More information about the Pixman mailing list