[Pixman] [PATCH] MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines.

Nemanja Lukic nlukic at mips.com
Wed Feb 29 03:04:33 PST 2012


From: Nemanja Lukic <nemanja.lukic at rt-rk.com>

Performance numbers before/after on MIPS-74kc @ 1GHz

Referent (before):

lowlevel-blt-bench:
              src_n_0565 =  L1: 238.14  L2: 233.15  M: 57.88 ( 77.23%)  HT: 53.22  VT: 49.99  R: 47.73  RT: 24.79 (  91Kops/s)
              src_n_8888 =  L1: 190.19  L2: 187.57  M: 28.94 ( 77.23%)  HT: 27.91  VT: 27.33  R: 26.64  RT: 14.68 (  77Kops/s)
cairo-perf-trace:
[ # ]  backend                         test   min(s) median(s) stddev. count
[ # ]    image: pixman 0.25.1
[  0]    image         gnome-system-monitor  268.460  269.712   0.22%    6/6

Optimized:

lowlevel-blt-bench:
              src_n_0565 =  L1:1081.39  L2: 258.22  M:189.59 (252.91%)  HT: 60.23  VT: 55.01  R: 53.44  RT: 23.68 (  89Kops/s)
              src_n_8888 =  L1: 653.46  L2: 113.55  M:135.26 (360.86%)  HT: 38.99  VT: 37.38  R: 34.95  RT: 18.67 (  84Kops/s)
cairo-perf-trace:
[ # ]  backend                         test   min(s) median(s) stddev. count
[ # ]    image: pixman 0.25.1
[  0]    image         gnome-system-monitor  246.565  246.706   0.04%    6/6
---
 pixman/pixman-mips-dspr2-asm.S |  105 ++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2.c     |  163 ++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2.h     |    4 +
 3 files changed, 272 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 0a4c87e..f1087a7 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -31,6 +31,111 @@
 
 #include "pixman-mips-dspr2-asm.h"
 
+LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     andi    t1, a0, 0x0002
+    beqz     t1, 0f          /* check if address is 4-byte aligned */
+     nop
+    sh       a2, 0(a0)
+    addiu    a0, a0, 2
+    addiu    a1, a1, -2
+0:
+    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
+    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -2
+    sh       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 2
+3:
+    jr       ra
+     nop
+
+END(pixman_fill_buff16_mips)
+
+LEAF_MIPS32R2(pixman_fill_buff32_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     nop
+    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -4
+    sw       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 4
+3:
+    jr       ra
+     nop
+
+END(pixman_fill_buff32_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
 /*
  * a0 - dst (r5g6b5)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index e331853..2beada3 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -49,6 +49,119 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
                                     uint8_t, 3, uint8_t, 3)
 
+static pixman_bool_t
+pixman_fill_mips (uint32_t *bits,
+                  int       stride,
+                  int       bpp,
+                  int       x,
+                  int       y,
+                  int       width,
+                  int       height,
+                  uint32_t  _xor)
+{
+    uint8_t *byte_line;
+    uint32_t byte_width;
+    switch (bpp)
+    {
+    case 16:
+        stride = stride * (int) sizeof (uint32_t) / 2;
+        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+        byte_width = width * 2;
+        stride *= 2;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
+        }
+        return TRUE;
+    case 32:
+        stride = stride * (int) sizeof (uint32_t) / 4;
+        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+        byte_width = width * 4;
+        stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff32_mips (dst, byte_width, _xor);
+        }
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static pixman_bool_t
+pixman_blt_mips (uint32_t *src_bits,
+                 uint32_t *dst_bits,
+                 int       src_stride,
+                 int       dst_stride,
+                 int       src_bpp,
+                 int       dst_bpp,
+                 int       src_x,
+                 int       src_y,
+                 int       dest_x,
+                 int       dest_y,
+                 int       width,
+                 int       height)
+{
+    if (src_bpp != dst_bpp)
+        return FALSE;
+
+    uint8_t *src_bytes;
+    uint8_t *dst_bytes;
+    uint32_t byte_width;
+
+    switch (src_bpp)
+    {
+    case 16:
+        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+        src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+                                          + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+        byte_width = width * 2;
+        src_stride *= 2;
+        dst_stride *= 2;
+
+        while (height--)
+        {
+            uint8_t *src = src_bytes;
+            uint8_t *dst = dst_bytes;
+            src_bytes += src_stride;
+            dst_bytes += dst_stride;
+            pixman_mips_fast_memcpy (dst, src, byte_width);
+        }
+        return TRUE;
+    case 32:
+        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+        src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+                                           + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+        byte_width = width * 4;
+        src_stride *= 4;
+        dst_stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *src = src_bytes;
+            uint8_t *dst = dst_bytes;
+            src_bytes += src_stride;
+            dst_bytes += dst_stride;
+            pixman_mips_fast_memcpy (dst, src, byte_width);
+        }
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
 static const pixman_fast_path_t mips_dspr2_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   mips_composite_src_0565_0565),
@@ -74,11 +187,61 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     { PIXMAN_OP_NONE },
 };
 
+static pixman_bool_t
+mips_dspr2_blt (pixman_implementation_t *imp,
+                uint32_t *               src_bits,
+                uint32_t *               dst_bits,
+                int                      src_stride,
+                int                      dst_stride,
+                int                      src_bpp,
+                int                      dst_bpp,
+                int                      src_x,
+                int                      src_y,
+                int                      dest_x,
+                int                      dest_y,
+                int                      width,
+                int                      height)
+{
+    if (!pixman_blt_mips (
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dest_x, dest_y, width, height))
+
+    {
+        return _pixman_implementation_blt (
+            imp->delegate,
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dest_x, dest_y, width, height);
+    }
+
+    return TRUE;
+}
+
+static pixman_bool_t
+mips_dspr2_fill (pixman_implementation_t *imp,
+                 uint32_t *               bits,
+                 int                      stride,
+                 int                      bpp,
+                 int                      x,
+                 int                      y,
+                 int                      width,
+                 int                      height,
+                 uint32_t xor)
+{
+    if (pixman_fill_mips (bits, stride, bpp, x, y, width, height, xor))
+        return TRUE;
+
+    return _pixman_implementation_fill (
+        imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+}
+
 pixman_implementation_t *
 _pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback)
 {
     pixman_implementation_t *imp =
         _pixman_implementation_create (fallback, mips_dspr2_fast_paths);
 
+    imp->blt = mips_dspr2_blt;
+    imp->fill = mips_dspr2_fill;
+
     return imp;
 }
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 449c42a..a40e7c8 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -41,6 +41,10 @@
 
 void
 pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
+void
+pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
 
 /****************************************************************/
 
-- 
1.7.3



More information about the Pixman mailing list