[Pixman] [PATCH 12/12] MIPS: enable prefetch for store only for CPU with 32 byte cache line
Nemanja Lukic
nemanja.lukic at rt-rk.com
Sun Sep 8 15:52:51 PDT 2013
---
pixman/pixman-mips-common.h | 31 +++++++++--
pixman/pixman-mips-dspr1-asm.S | 59 +++++++++++++++++++++-
pixman/pixman-mips-dspr1.c | 15 ++++--
pixman/pixman-mips-dspr2.c | 6 +--
pixman/pixman-mips.c | 31 +++++++++++-
pixman/pixman-mips32r2-asm.S | 110 ++++++++++++++++++++++++++++++++++++++--
pixman/pixman-mips32r2.c | 25 +++++++--
7 files changed, 249 insertions(+), 28 deletions(-)
diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
index 05ff7ad..a141226 100644
--- a/pixman/pixman-mips-common.h
+++ b/pixman/pixman-mips-common.h
@@ -39,16 +39,37 @@
#define SKIP_ZERO_MASK 2
#define DO_FAST_MEMCPY 3
-void
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+ defined(USE_MIPS32R2)
+extern int allow_prefetch;
+#endif
+
+void*
+(*pixman_fast_memcpy_mips) (void *dst, void *src, uint32_t n_bytes);
+void*
pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes);
+
+void
+(*pixman_fill_buff32_mips32r2) (void *dst, uint32_t n_bytes, uint32_t value);
void
-pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+pixman_fill_buff32_pref_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
void
-pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
+pixman_fill_buff32_no_pref_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+
+void
+(*pixman_fill_buff16_mips32r2) (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_pref_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_no_pref_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
#ifdef USE_MIPS_DSPR1
void
-pixman_fill_buff16_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
+(*pixman_fill_buff16_mips_dspr1) (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_pref_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff16_no_pref_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
#endif
/****************************************************************/
@@ -85,7 +106,7 @@ mips_composite_##name (pixman_implementation_t *imp, \
src_line += src_stride; \
\
if (flags == DO_FAST_MEMCPY) \
- pixman_fast_memcpy_mips32r2 (dst, src, width * bpp); \
+ pixman_fast_memcpy_mips (dst, src, width * bpp); \
else \
pixman_composite_##name##_asm##suffix (dst, src, width); \
} \
diff --git a/pixman/pixman-mips-dspr1-asm.S b/pixman/pixman-mips-dspr1-asm.S
index a4b9ebc..91fae9a 100644
--- a/pixman/pixman-mips-dspr1-asm.S
+++ b/pixman/pixman-mips-dspr1-asm.S
@@ -32,7 +32,62 @@
#include "pixman-private.h"
#include "pixman-mips-dspr1-asm.h"
-LEAF_MIPS_DSPR1(pixman_fill_buff16)
+LEAF_MIPS_DSPR1(pixman_fill_buff16_no_pref)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ andi t1, a0, 0x0002
+ beqz t1, 0f /* check if address is 4-byte aligned */
+ nop
+ sh a2, 0(a0)
+ addiu a0, a0, 2
+ addiu a1, a1, -2
+0:
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ replv.ph a2, a2 /* replicate fill value (16bit) in a2 */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -2
+ sh a2, 0(a0)
+ b 2b
+ addiu a0, a0, 2
+3:
+ jr ra
+ nop
+
+END_MIPS_DSPR1(pixman_fill_buff16_no_pref)
+
+LEAF_MIPS_DSPR1(pixman_fill_buff16_pref)
/*
* a0 - *dest
* a1 - count (bytes)
@@ -86,7 +141,7 @@ LEAF_MIPS_DSPR1(pixman_fill_buff16)
jr ra
nop
-END_MIPS_DSPR1(pixman_fill_buff16)
+END_MIPS_DSPR1(pixman_fill_buff16_pref)
LEAF_MIPS_DSPR1(pixman_composite_add_8888_8888_asm)
/*
diff --git a/pixman/pixman-mips-dspr1.c b/pixman/pixman-mips-dspr1.c
index 638d993..8b04fe6 100644
--- a/pixman/pixman-mips-dspr1.c
+++ b/pixman/pixman-mips-dspr1.c
@@ -152,11 +152,7 @@ mips_dspr1_blt (pixman_implementation_t *imp,
uint8_t *dst = dst_bytes;
src_bytes += src_stride;
dst_bytes += dst_stride;
-#ifdef USE_MIPS32R2
- pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
-#else
- memcpy (dst, src, byte_width);
-#endif
+ pixman_fast_memcpy_mips (dst, src, byte_width);
}
return TRUE;
@@ -175,6 +171,15 @@ _pixman_implementation_create_mips_dspr1 (pixman_implementation_t *fallback)
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, mips_dspr1_fast_paths);
+ if (allow_prefetch)
+ {
+ pixman_fill_buff16_mips_dspr1 = pixman_fill_buff16_pref_mips_dspr1;
+ }
+ else
+ {
+ pixman_fill_buff16_mips_dspr1 = pixman_fill_buff16_no_pref_mips_dspr1;
+ }
+
imp->blt = mips_dspr1_blt;
imp->fill = mips_dspr1_fill;
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index a9773b7..a1551ca 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -292,11 +292,7 @@ mips_dspr2_blt (pixman_implementation_t *imp,
uint8_t *dst = dst_bytes;
src_bytes += src_stride;
dst_bytes += dst_stride;
-#ifdef USE_MIPS32R2
- pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
-#else
- memcpy (dst, src, byte_width);
-#endif
+ pixman_fast_memcpy_mips (dst, src, byte_width);
}
return TRUE;
diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c
index 53c1023..7557ce3 100644
--- a/pixman/pixman-mips.c
+++ b/pixman/pixman-mips.c
@@ -24,10 +24,16 @@
#endif
#include "pixman-private.h"
+#include "pixman-mips-common.h"
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+ defined(USE_MIPS32R2)
+int allow_prefetch;
+#endif
+
#ifdef USE_MIPS_DSPR2
static const char *mips_dspr2_cores[] = {"MIPS 74K", NULL};
#endif
@@ -137,16 +143,37 @@ _pixman_mips_get_implementations (pixman_implementation_t *imp)
if (long_bit != 32)
return imp;
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+ defined(USE_MIPS32R2)
+ allow_prefetch = 0;
+ pixman_fast_memcpy_mips = (void*)memcpy;
+#endif
+
#ifdef USE_MIPS32R2
if (!_pixman_disabled ("mips32r2"))
{
int already_compiling_everything_for_mips32r2 = 0;
+ pixman_bool_t temp = FALSE;
#if defined(__mips__) && (__mips_isa_rev >= 2)
already_compiling_everything_for_mips32r2 = 1;
#endif
- if (already_compiling_everything_for_mips32r2 ||
- have_feature (mips32r2_cores, " mips32r2"))
+ temp = have_feature (mips32r2_cores, " mips32r2");
+ if (already_compiling_everything_for_mips32r2 || temp)
{
+ if (temp)
+ {
+ int cache_line_size;
+
+ __asm__ volatile (
+ ".set arch=mips32r2 \n\t"
+ "rdhwr %0, $1 \n\t"
+ : "=r" (cache_line_size)
+ :
+ );
+
+ if (cache_line_size == 32)
+ allow_prefetch = 1;
+ }
imp = _pixman_implementation_create_mips32r2 (imp);
}
}
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
index 75ff9e2..38048a0 100644
--- a/pixman/pixman-mips32r2-asm.S
+++ b/pixman/pixman-mips32r2-asm.S
@@ -381,7 +381,61 @@ $ua_smallCopy_loop:
END_MIPS32R2(pixman_fast_memcpy)
-LEAF_MIPS32R2(pixman_fill_buff16)
+LEAF_MIPS32R2(pixman_fill_buff16_no_pref)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ andi t1, a0, 0x0002
+ beqz t1, 0f /* check if address is 4-byte aligned */
+ nop
+ sh a2, 0(a0)
+ addiu a0, a0, 2
+ addiu a1, a1, -2
+0:
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ ins a2, a2, 16, 16
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -2
+ sh a2, 0(a0)
+ b 2b
+ addiu a0, a0, 2
+3:
+ jr ra
+ nop
+
+END_MIPS32R2(pixman_fill_buff16_no_pref)
+
+LEAF_MIPS32R2(pixman_fill_buff16_pref)
/*
* a0 - *dest
* a1 - count (bytes)
@@ -434,9 +488,57 @@ LEAF_MIPS32R2(pixman_fill_buff16)
jr ra
nop
-END_MIPS32R2(pixman_fill_buff16)
+END_MIPS32R2(pixman_fill_buff16_pref)
+
+LEAF_MIPS32R2(pixman_fill_buff32_no_pref)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ nop
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -4
+ sw a2, 0(a0)
+ b 2b
+ addiu a0, a0, 4
+3:
+ jr ra
+ nop
+
+END_MIPS32R2(pixman_fill_buff32_no_pref)
-LEAF_MIPS32R2(pixman_fill_buff32)
+LEAF_MIPS32R2(pixman_fill_buff32_pref)
/*
* a0 - *dest
* a1 - count (bytes)
@@ -483,7 +585,7 @@ LEAF_MIPS32R2(pixman_fill_buff32)
jr ra
nop
-END_MIPS32R2(pixman_fill_buff32)
+END_MIPS32R2(pixman_fill_buff32_pref)
LEAF_MIPS32R2(pixman_composite_src_x888_8888_asm)
/*
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
index 26b62f7..4b27608 100644
--- a/pixman/pixman-mips32r2.c
+++ b/pixman/pixman-mips32r2.c
@@ -146,13 +146,17 @@ mips32r2_blt (pixman_implementation_t *imp,
uint8_t *dst = dst_bytes;
src_bytes += src_stride;
dst_bytes += dst_stride;
- pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+ pixman_fast_memcpy_mips (dst, src, byte_width);
}
return TRUE;
}
static const pixman_fast_path_t mips32r2_fast_paths[] =
{
+ /* pref ON */
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
+ /* pref OFF */
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565),
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
@@ -162,16 +166,27 @@ static const pixman_fast_path_t mips32r2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
{ PIXMAN_OP_NONE },
};
pixman_implementation_t *
_pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback)
{
- pixman_implementation_t *imp =
- _pixman_implementation_create (fallback, mips32r2_fast_paths);
+ pixman_implementation_t *imp;
+
+ if (allow_prefetch)
+ {
+ imp = _pixman_implementation_create (fallback, &mips32r2_fast_paths[0]);
+ pixman_fill_buff16_mips32r2 = pixman_fill_buff16_pref_mips32r2;
+ pixman_fill_buff32_mips32r2 = pixman_fill_buff32_pref_mips32r2;
+ pixman_fast_memcpy_mips = pixman_fast_memcpy_mips32r2;
+ }
+ else
+ {
+ imp = _pixman_implementation_create (fallback, &mips32r2_fast_paths[2]);
+ pixman_fill_buff16_mips32r2 = pixman_fill_buff16_no_pref_mips32r2;
+ pixman_fill_buff32_mips32r2 = pixman_fill_buff32_no_pref_mips32r2;
+ }
imp->blt = mips32r2_blt;
imp->fill = mips32r2_fill;
--
1.7.3
More information about the Pixman
mailing list