[Pixman] [PATCH 11/13] MIPS: mips32r2: Added optimization for function pixman_fill_buff16

Nemanja Lukic nemanja.lukic at rt-rk.com
Fri Jun 27 09:05:48 PDT 2014


Performance numbers before/after on MIPS-24kc @ 500 MHz

Referent (before):

    src_n_0565    =  L1: 117.24  L2: 110.68  M:115.83 ( 96.31%)  HT: 78.96  VT: 75.03  R: 65.98  RT: 24.94 ( 164Kops/s)

Optimized (with these optimizations):

    src_n_0565    =  L1: 429.43  L2: 299.39  M:346.21 (287.61%)  HT: 90.68  VT: 80.23  R: 70.99  RT: 23.13 ( 156Kops/s)
---
 pixman/pixman-mips-common.h  |    2 +
 pixman/pixman-mips32r2-asm.S |   55 ++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips32r2.c     |   19 +++++---------
 3 files changed, 64 insertions(+), 12 deletions(-)

diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
index 70af1f7..05ff7ad 100644
--- a/pixman/pixman-mips-common.h
+++ b/pixman/pixman-mips-common.h
@@ -43,6 +43,8 @@ void
 pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes);
 void
 pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+void
+pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
 
 #ifdef USE_MIPS_DSPR1
 void
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
index 3f73e41..75ff9e2 100644
--- a/pixman/pixman-mips32r2-asm.S
+++ b/pixman/pixman-mips32r2-asm.S
@@ -381,6 +381,61 @@ $ua_smallCopy_loop:
 
 END_MIPS32R2(pixman_fast_memcpy)
 
+LEAF_MIPS32R2(pixman_fill_buff16)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     andi    t1, a0, 0x0002
+    beqz     t1, 0f          /* check if address is 4-byte aligned */
+     nop
+    sh       a2, 0(a0)
+    addiu    a0, a0, 2
+    addiu    a1, a1, -2
+0:
+    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
+    beqz     t1, 2f
+     ins     a2, a2, 16, 16
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -2
+    sh       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 2
+3:
+    jr       ra
+     nop
+
+END_MIPS32R2(pixman_fill_buff16)
+
 LEAF_MIPS32R2(pixman_fill_buff32)
 /*
  * a0 - *dest
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
index 99ceb85..18fc786 100644
--- a/pixman/pixman-mips32r2.c
+++ b/pixman/pixman-mips32r2.c
@@ -58,25 +58,20 @@ mips32r2_fill (pixman_implementation_t *imp,
 {
     uint8_t *byte_line;
     uint32_t byte_width;
-    int i, short_stride;
-    uint16_t *dst;
-    uint16_t v;
 
     switch (bpp)
     {
     case 16:
-        short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
-        dst = (uint16_t *)bits;
-        v = _xor & 0xffff;
-
-        dst = dst + y * short_stride + x;
+        stride = stride * (int) sizeof (uint32_t) / 2;
+        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+        byte_width = width * 2;
+        stride *= 2;
 
         while (height--)
         {
-            for (i = 0; i < width; ++i)
-                dst[i] = v;
-
-            dst += short_stride;
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff);
         }
         return TRUE;
     case 32:
-- 
1.7.3



More information about the Pixman mailing list