[Pixman] [PATCH 09/11] MIPS: mips32r2: Added optimization for function pixman_fill_buff16
Nemanja Lukic
nemanja.lukic at rt-rk.com
Thu Mar 13 06:13:21 PDT 2014
Performance numbers before/after on MIPS-24kc @ 500 MHz
Referent (before):
src_n_0565 = L1: 117.24 L2: 110.68 M:115.83 ( 96.31%) HT: 78.96 VT: 75.03 R: 65.98 RT: 24.94 ( 164Kops/s)
Optimized (with these optimizations):
src_n_0565 = L1: 429.43 L2: 299.39 M:346.21 (287.61%) HT: 90.68 VT: 80.23 R: 70.99 RT: 23.13 ( 156Kops/s)
---
pixman/pixman-mips-common.h | 2 +
pixman/pixman-mips-dspr2.c | 12 +++++----
pixman/pixman-mips32r2-asm.S | 55 ++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips32r2.c | 19 +++++---------
4 files changed, 71 insertions(+), 17 deletions(-)
diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
index 70af1f7..05ff7ad 100644
--- a/pixman/pixman-mips-common.h
+++ b/pixman/pixman-mips-common.h
@@ -43,6 +43,8 @@ void
pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes);
void
pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+void
+pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
#ifdef USE_MIPS_DSPR1
void
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 3554f8d..a9773b7 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -167,9 +167,7 @@ mips_dspr2_fill (pixman_implementation_t *imp,
#if defined(USE_MIPS32R2) || defined(USE_MIPS_DSPR1)
uint8_t *byte_line;
uint32_t byte_width;
-#endif
-
-#ifndef USE_MIPS_DSPR1
+#else
int32_t short_stride;
uint16_t *dst;
uint16_t v;
@@ -178,7 +176,7 @@ mips_dspr2_fill (pixman_implementation_t *imp,
switch (bpp)
{
case 16:
-#ifdef USE_MIPS_DSPR1
+#if defined(USE_MIPS32R2) || defined(USE_MIPS_DSPR1)
stride = stride * (int) sizeof (uint32_t) / 2;
byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
byte_width = width * 2;
@@ -192,11 +190,15 @@ mips_dspr2_fill (pixman_implementation_t *imp,
while (height--)
{
-#ifdef USE_MIPS_DSPR1
+#if defined(USE_MIPS32R2) || defined(USE_MIPS_DSPR1)
uint8_t *dst = byte_line;
byte_line += stride;
+#ifdef USE_MIPS_DSPR1
pixman_fill_buff16_mips_dspr1 (dst, byte_width, _xor & 0xffff);
#else
+ pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff);
+#endif
+#else
int i;
for (i = 0; i < width; ++i)
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
index 3f73e41..75ff9e2 100644
--- a/pixman/pixman-mips32r2-asm.S
+++ b/pixman/pixman-mips32r2-asm.S
@@ -381,6 +381,61 @@ $ua_smallCopy_loop:
END_MIPS32R2(pixman_fast_memcpy)
+LEAF_MIPS32R2(pixman_fill_buff16)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ andi t1, a0, 0x0002
+ beqz t1, 0f /* check if address is 4-byte aligned */
+ nop
+ sh a2, 0(a0)
+ addiu a0, a0, 2
+ addiu a1, a1, -2
+0:
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ ins a2, a2, 16, 16
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ pref 30, 32(a0)
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -2
+ sh a2, 0(a0)
+ b 2b
+ addiu a0, a0, 2
+3:
+ jr ra
+ nop
+
+END_MIPS32R2(pixman_fill_buff16)
+
LEAF_MIPS32R2(pixman_fill_buff32)
/*
* a0 - *dest
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
index 4a6d29e..26b62f7 100644
--- a/pixman/pixman-mips32r2.c
+++ b/pixman/pixman-mips32r2.c
@@ -58,25 +58,20 @@ mips32r2_fill (pixman_implementation_t *imp,
{
uint8_t *byte_line;
uint32_t byte_width;
- int i, short_stride;
- uint16_t *dst;
- uint16_t v;
switch (bpp)
{
case 16:
- short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
- dst = (uint16_t *)bits;
- v = _xor & 0xffff;
-
- dst = dst + y * short_stride + x;
+ stride = stride * (int) sizeof (uint32_t) / 2;
+ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+ byte_width = width * 2;
+ stride *= 2;
while (height--)
{
- for (i = 0; i < width; ++i)
- dst[i] = v;
-
- dst += short_stride;
+ uint8_t *dst = byte_line;
+ byte_line += stride;
+ pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff);
}
return TRUE;
case 32:
--
1.7.3
More information about the Pixman
mailing list