[Pixman] [PATCH] MIPS: DSPr2: Added mips_dspr2_blt and mips_dspr2_fill routines.
Nemanja Lukic
nlukic at mips.com
Tue Feb 28 04:47:28 PST 2012
From: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Performance numbers before/after on MIPS-74kc @ 1GHz
Referent (before):
cairo-perf-trace:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.25.1
[ 0] image gnome-system-monitor 268.460 269.712 0.22% 6/6
Optimized:
cairo-perf-trace:
[ # ] backend test min(s) median(s) stddev. count
[ # ] image: pixman 0.25.1
[ 0] image gnome-system-monitor 246.565 246.706 0.04% 6/6
---
pixman/pixman-mips-dspr2-asm.S | 114 ++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2.c | 163 ++++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2.h | 4 +
3 files changed, 281 insertions(+), 0 deletions(-)
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 0a4c87e..4125beb 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -31,6 +31,120 @@
#include "pixman-mips-dspr2-asm.h"
+LEAF_MIPS32R2(pixman_fill_buff16_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ nop
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ pref 30, 32(a0)
+ sh a2, 0(a0)
+ sh a2, 2(a0)
+ sh a2, 4(a0)
+ sh a2, 6(a0)
+ sh a2, 8(a0)
+ sh a2, 10(a0)
+ sh a2, 12(a0)
+ sh a2, 14(a0)
+ sh a2, 16(a0)
+ sh a2, 18(a0)
+ sh a2, 20(a0)
+ sh a2, 22(a0)
+ sh a2, 24(a0)
+ sh a2, 26(a0)
+ sh a2, 28(a0)
+ sh a2, 30(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sh a2, 0(a0)
+ sh a2, 2(a0)
+ sh a2, 4(a0)
+ sh a2, 6(a0)
+ sh a2, 8(a0)
+ sh a2, 10(a0)
+ sh a2, 12(a0)
+ sh a2, 14(a0)
+ sh a2, 16(a0)
+ sh a2, 18(a0)
+ sh a2, 20(a0)
+ sh a2, 22(a0)
+ sh a2, 24(a0)
+ sh a2, 26(a0)
+ sh a2, 28(a0)
+ sh a2, 30(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -2
+ sh a2, 0(a0)
+ b 2b
+ addiu a0,a0, 2
+3:
+ jr ra
+ nop
+
+END(pixman_fill_buff16_mips)
+
+LEAF_MIPS32R2(pixman_fill_buff32_mips)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ nop
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ pref 30, 32(a0)
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -4
+ sw a2, 0(a0)
+ b 2b
+ addiu a0,a0, 4
+3:
+ jr ra
+ nop
+
+END(pixman_fill_buff32_mips)
+
LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
/*
* a0 - dst (r5g6b5)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index e331853..2beada3 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -49,6 +49,119 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
uint8_t, 3, uint8_t, 3)
+static pixman_bool_t
+pixman_fill_mips (uint32_t *bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t _xor)
+{
+ uint8_t *byte_line;
+ uint32_t byte_width;
+ switch (bpp)
+ {
+ case 16:
+ stride = stride * (int) sizeof (uint32_t) / 2;
+ byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+ byte_width = width * 2;
+ stride *= 2;
+
+ while (height--)
+ {
+ uint8_t *dst = byte_line;
+ byte_line += stride;
+ pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
+ }
+ return TRUE;
+ case 32:
+ stride = stride * (int) sizeof (uint32_t) / 4;
+ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+ byte_width = width * 4;
+ stride *= 4;
+
+ while (height--)
+ {
+ uint8_t *dst = byte_line;
+ byte_line += stride;
+ pixman_fill_buff32_mips (dst, byte_width, _xor);
+ }
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static pixman_bool_t
+pixman_blt_mips (uint32_t *src_bits,
+ uint32_t *dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+ uint32_t byte_width;
+
+ switch (src_bpp)
+ {
+ case 16:
+ src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+ + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+ + dst_stride * (dest_y) + (dest_x));
+ byte_width = width * 2;
+ src_stride *= 2;
+ dst_stride *= 2;
+
+ while (height--)
+ {
+ uint8_t *src = src_bytes;
+ uint8_t *dst = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ pixman_mips_fast_memcpy (dst, src, byte_width);
+ }
+ return TRUE;
+ case 32:
+ src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+ + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+ + dst_stride * (dest_y) + (dest_x));
+ byte_width = width * 4;
+ src_stride *= 4;
+ dst_stride *= 4;
+
+ while (height--)
+ {
+ uint8_t *src = src_bytes;
+ uint8_t *dst = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ pixman_mips_fast_memcpy (dst, src, byte_width);
+ }
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
static const pixman_fast_path_t mips_dspr2_fast_paths[] =
{
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565),
@@ -74,11 +187,61 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
{ PIXMAN_OP_NONE },
};
+static pixman_bool_t
+mips_dspr2_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ if (!pixman_blt_mips (
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dest_x, dest_y, width, height))
+
+ {
+ return _pixman_implementation_blt (
+ imp->delegate,
+ src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+ src_x, src_y, dest_x, dest_y, width, height);
+ }
+
+ return TRUE;
+}
+
+static pixman_bool_t
+mips_dspr2_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ if (pixman_fill_mips (bits, stride, bpp, x, y, width, height, xor))
+ return TRUE;
+
+ return _pixman_implementation_fill (
+ imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+}
+
pixman_implementation_t *
_pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback)
{
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, mips_dspr2_fast_paths);
+ imp->blt = mips_dspr2_blt;
+ imp->fill = mips_dspr2_fill;
+
return imp;
}
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 449c42a..a40e7c8 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -41,6 +41,10 @@
void
pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
+void
+pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
+void
+pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
/****************************************************************/
--
1.7.3
More information about the Pixman
mailing list