[Pixman] [PATCH 13/13] MIPS: enable prefetch for store only for CPU with 32 byte cache line

Nemanja Lukic nemanja.lukic at rt-rk.com
Fri Jun 27 09:05:50 PDT 2014


PrepareForStore prefetch is destructive and affects the whole
cache line. Running the code which assumes 32 byte cache line size on
the system with 64 byte cache lines may cause data corruption.
Added mechanism to allow prefetch only if cache line size is 32.
Added no_prefetch version of functions which use prefetch.
---
 pixman/pixman-mips-common.h    |   29 +++++++++--
 pixman/pixman-mips-dspr1-asm.S |   59 +++++++++++++++++++++-
 pixman/pixman-mips-dspr1.c     |    9 +++
 pixman/pixman-mips.c           |   30 ++++++++++-
 pixman/pixman-mips32r2-asm.S   |  110 ++++++++++++++++++++++++++++++++++++++--
 pixman/pixman-mips32r2.c       |   24 +++++++--
 6 files changed, 243 insertions(+), 18 deletions(-)

diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
index 05ff7ad..e9a180f 100644
--- a/pixman/pixman-mips-common.h
+++ b/pixman/pixman-mips-common.h
@@ -39,16 +39,35 @@
 #define SKIP_ZERO_MASK 2
 #define DO_FAST_MEMCPY 3
 
-void
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+    defined(USE_MIPS32R2)
+extern int allow_prefetch;
+#endif
+
+void*
 pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes);
+
+void
+(*pixman_fill_buff32_mips32r2) (void *dst, uint32_t n_bytes, uint32_t value);
 void
-pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+pixman_fill_buff32_pref_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
 void
-pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
+pixman_fill_buff32_no_pref_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+
+void
+(*pixman_fill_buff16_mips32r2) (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_pref_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_no_pref_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
 
 #ifdef USE_MIPS_DSPR1
 void
-pixman_fill_buff16_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
+(*pixman_fill_buff16_mips_dspr1) (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_pref_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_no_pref_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
 #endif
 
 /****************************************************************/
@@ -85,7 +104,7 @@ mips_composite_##name (pixman_implementation_t *imp,             \
       src_line += src_stride;                                    \
                                                                  \
       if (flags == DO_FAST_MEMCPY)                               \
-        pixman_fast_memcpy_mips32r2 (dst, src, width * bpp);     \
+        pixman_memcpy (dst, src, width * bpp);                   \
       else                                                       \
         pixman_composite_##name##_asm##suffix (dst, src, width); \
     }                                                            \
diff --git a/pixman/pixman-mips-dspr1-asm.S b/pixman/pixman-mips-dspr1-asm.S
index a4b9ebc..91fae9a 100644
--- a/pixman/pixman-mips-dspr1-asm.S
+++ b/pixman/pixman-mips-dspr1-asm.S
@@ -32,7 +32,62 @@
 #include "pixman-private.h"
 #include "pixman-mips-dspr1-asm.h"
 
-LEAF_MIPS_DSPR1(pixman_fill_buff16)
+LEAF_MIPS_DSPR1(pixman_fill_buff16_no_pref)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     andi    t1, a0, 0x0002
+    beqz     t1, 0f          /* check if address is 4-byte aligned */
+     nop
+    sh       a2, 0(a0)
+    addiu    a0, a0, 2
+    addiu    a1, a1, -2
+0:
+    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
+    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -2
+    sh       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 2
+3:
+    jr       ra
+     nop
+
+END_MIPS_DSPR1(pixman_fill_buff16_no_pref)
+
+LEAF_MIPS_DSPR1(pixman_fill_buff16_pref)
 /*
  * a0 - *dest
  * a1 - count (bytes)
@@ -86,7 +141,7 @@ LEAF_MIPS_DSPR1(pixman_fill_buff16)
     jr       ra
      nop
 
-END_MIPS_DSPR1(pixman_fill_buff16)
+END_MIPS_DSPR1(pixman_fill_buff16_pref)
 
 LEAF_MIPS_DSPR1(pixman_composite_add_8888_8888_asm)
 /*
diff --git a/pixman/pixman-mips-dspr1.c b/pixman/pixman-mips-dspr1.c
index 2db4cb1..5e26caa 100644
--- a/pixman/pixman-mips-dspr1.c
+++ b/pixman/pixman-mips-dspr1.c
@@ -85,6 +85,15 @@ _pixman_implementation_create_mips_dspr1 (pixman_implementation_t *fallback)
     pixman_implementation_t *imp =
         _pixman_implementation_create (fallback, mips_dspr1_fast_paths);
 
+    if (allow_prefetch)
+    {
+        pixman_fill_buff16_mips_dspr1 = pixman_fill_buff16_pref_mips_dspr1;
+    }
+    else
+    {
+        pixman_fill_buff16_mips_dspr1 = pixman_fill_buff16_no_pref_mips_dspr1;
+    }
+
     imp->fill = mips_dspr1_fill;
 
     return imp;
diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c
index 4c31a80..d69ad2e 100644
--- a/pixman/pixman-mips.c
+++ b/pixman/pixman-mips.c
@@ -24,9 +24,15 @@
 #endif
 
 #include "pixman-private.h"
+#include "pixman-mips-common.h"
 #include <string.h>
 #include <stdlib.h>
 
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+    defined(USE_MIPS32R2)
+int allow_prefetch;
+#endif
+
 #ifdef USE_MIPS_DSPR2
 static const char *mips_dspr2_cores[] =
 {
@@ -146,16 +152,36 @@ _pixman_mips_get_implementations (pixman_implementation_t *imp)
     if (sizeof (uintptr_t) != 4)
         return imp;
 
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+    defined(USE_MIPS32R2)
+    allow_prefetch = 0;
+#endif
+
 #ifdef USE_MIPS32R2
     if (!_pixman_disabled ("mips32r2"))
     {
         int already_compiling_everything_for_mips32r2 = 0;
+        pixman_bool_t temp = FALSE;
 #if defined(__mips__) && (__mips_isa_rev >= 2)
         already_compiling_everything_for_mips32r2 = 1;
 #endif
-        if (already_compiling_everything_for_mips32r2 ||
-            have_feature (mips32r2_cores, "mips32r2")
+        temp = have_feature (mips32r2_cores, "mips32r2");
+        if (already_compiling_everything_for_mips32r2 || temp)
         {
+            if (temp)
+            {
+                int cache_line_size;
+
+                __asm__ volatile (
+                    ".set   arch=mips32r2 \n\t"
+                    "rdhwr  %0, $1        \n\t"
+                    : "=r" (cache_line_size)
+                    :
+                );
+
+                if (cache_line_size == 32)
+                    allow_prefetch = 1;
+            }
             imp = _pixman_implementation_create_mips32r2 (imp);
         }
     }
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
index 75ff9e2..38048a0 100644
--- a/pixman/pixman-mips32r2-asm.S
+++ b/pixman/pixman-mips32r2-asm.S
@@ -381,7 +381,61 @@ $ua_smallCopy_loop:
 
 END_MIPS32R2(pixman_fast_memcpy)
 
-LEAF_MIPS32R2(pixman_fill_buff16)
+LEAF_MIPS32R2(pixman_fill_buff16_no_pref)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     andi    t1, a0, 0x0002
+    beqz     t1, 0f          /* check if address is 4-byte aligned */
+     nop
+    sh       a2, 0(a0)
+    addiu    a0, a0, 2
+    addiu    a1, a1, -2
+0:
+    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
+    beqz     t1, 2f
+     ins     a2, a2, 16, 16
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -2
+    sh       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 2
+3:
+    jr       ra
+     nop
+
+END_MIPS32R2(pixman_fill_buff16_no_pref)
+
+LEAF_MIPS32R2(pixman_fill_buff16_pref)
 /*
  * a0 - *dest
  * a1 - count (bytes)
@@ -434,9 +488,57 @@ LEAF_MIPS32R2(pixman_fill_buff16)
     jr       ra
      nop
 
-END_MIPS32R2(pixman_fill_buff16)
+END_MIPS32R2(pixman_fill_buff16_pref)
+
+LEAF_MIPS32R2(pixman_fill_buff32_no_pref)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     nop
+    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -4
+    sw       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 4
+3:
+    jr       ra
+     nop
+
+END_MIPS32R2(pixman_fill_buff32_no_pref)
 
-LEAF_MIPS32R2(pixman_fill_buff32)
+LEAF_MIPS32R2(pixman_fill_buff32_pref)
 /*
  * a0 - *dest
  * a1 - count (bytes)
@@ -483,7 +585,7 @@ LEAF_MIPS32R2(pixman_fill_buff32)
     jr       ra
      nop
 
-END_MIPS32R2(pixman_fill_buff32)
+END_MIPS32R2(pixman_fill_buff32_pref)
 
 LEAF_MIPS32R2(pixman_composite_src_x888_8888_asm)
 /*
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
index 18fc786..0786436 100644
--- a/pixman/pixman-mips32r2.c
+++ b/pixman/pixman-mips32r2.c
@@ -153,6 +153,10 @@ mips32r2_blt (pixman_implementation_t *imp,
 
 static const pixman_fast_path_t mips32r2_fast_paths[] =
 {
+    /* pref ON */
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
+    /* pref OFF */
     PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   mips_composite_src_0565_0565),
     PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, b5g6r5,   mips_composite_src_0565_0565),
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
@@ -162,18 +166,28 @@ static const pixman_fast_path_t mips32r2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
     PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
     { PIXMAN_OP_NONE },
 };
 
 pixman_implementation_t *
 _pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback)
 {
-    pixman_implementation_t *imp =
-        _pixman_implementation_create (fallback, mips32r2_fast_paths);
+    pixman_implementation_t *imp;
+
+    if (allow_prefetch)
+    {
+        imp = _pixman_implementation_create (fallback, &mips32r2_fast_paths[0]);
+        pixman_fill_buff16_mips32r2 = pixman_fill_buff16_pref_mips32r2;
+        pixman_fill_buff32_mips32r2 = pixman_fill_buff32_pref_mips32r2;
+        imp->memcpy = (void *)pixman_fast_memcpy_mips32r2;
+    }
+    else
+    {
+        imp = _pixman_implementation_create (fallback, &mips32r2_fast_paths[2]);
+        pixman_fill_buff16_mips32r2 = pixman_fill_buff16_no_pref_mips32r2;
+        pixman_fill_buff32_mips32r2 = pixman_fill_buff32_no_pref_mips32r2;
+    }
 
-    imp->memcpy = pixman_fast_memcpy_mips32r2;
     imp->blt = mips32r2_blt;
     imp->fill = mips32r2_fill;
 
-- 
1.7.3



More information about the Pixman mailing list