[PATCH] Added MIPS32R2 and MIPS DSP ASE optimized functions.
Georgi Beloev
gb at beloev.net
Wed Sep 8 17:34:22 PDT 2010
The following functions were implemented for MIPS32R2:
- pixman_fill32()
- fast_composite_over_n_8_8888()
The following functions were implemented for MIPS DSP ASE:
- combine_over_u()
- fast_composite_over_n_8_8888()
Additionally, MIPS DSP ASE uses the MIPS32R2 pixman_fill32() function.
Use configure commands similar to the ones below to select the target
processor and, correspondingly, the target instruction set:
- MIPS32R2: configure CFLAGS='-march=24kc -O2'
- MIPS DSP ASE: configure CFLAGS='-march=24kec -O2'
---
configure.ac | 63 +++++++++++++
pixman/Makefile.am | 22 +++++
pixman/pixman-cpu.c | 21 ++++
pixman/pixman-mips-dspase1-asm.S | 189 ++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspase1.c | 107 +++++++++++++++++++++
pixman/pixman-mips32r2-asm.S | 180 ++++++++++++++++++++++++++++++++++++
pixman/pixman-mips32r2.c | 112 ++++++++++++++++++++++
pixman/pixman-private.h | 11 ++
8 files changed, 705 insertions(+), 0 deletions(-)
create mode 100644 pixman/pixman-mips-dspase1-asm.S
create mode 100644 pixman/pixman-mips-dspase1.c
create mode 100644 pixman/pixman-mips32r2-asm.S
create mode 100644 pixman/pixman-mips32r2.c
diff --git a/configure.ac b/configure.ac
index 5242799..2a7e49a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -565,6 +565,69 @@ fi
AM_CONDITIONAL(USE_GCC_INLINE_ASM, test $have_gcc_inline_asm = yes)
+dnl ==========================================================================
+dnl Check if the compiler supports MIPS32R2 instructions
+
+AC_MSG_CHECKING(whether to use MIPS32R2 instructions)
+AC_COMPILE_IFELSE([[
+void test()
+{
+ asm("ext \$v0,\$a0,8,8");
+}
+]], have_mips32r2=yes, have_mips32r2=no)
+
+AC_ARG_ENABLE(mips32r2,
+ [AC_HELP_STRING([--disable-mips32r2],
+ [disable MIPS32R2 fast paths])],
+ [enable_mips32r2=$enableval], [enable_mips32r2=auto])
+
+if test $enable_mips32r2 = no ; then
+ have_mips32r2=disabled
+fi
+
+if test $have_mips32r2 = yes ; then
+ AC_DEFINE(USE_MIPS32R2, 1, [use MIPS32R2 optimizations])
+fi
+
+AM_CONDITIONAL(USE_MIPS32R2, test $have_mips32r2 = yes)
+
+AC_MSG_RESULT($have_mips32r2)
+if test $enable_mips32r2 = yes && test $have_mips32r2 = no ; then
+ AC_MSG_ERROR([MIPS32R2 not detected])
+fi
+
+
+dnl ==========================================================================
+dnl Check if the compiler supports MIPS DSP ASE Rev 1 instructions
+
+AC_MSG_CHECKING(whether to use MIPS DSP ASE Rev 1 instructions)
+AC_COMPILE_IFELSE([[
+void test()
+{
+ asm("addu.qb \$v0,\$a0,\$a1");
+}
+]], have_mips_dspase1=yes, have_mips_dspase1=no)
+
+AC_ARG_ENABLE(mips-dspase1,
+ [AC_HELP_STRING([--disable-mips-dspase1],
+ [disable MIPS DSP ASE Rev 1 fast paths])],
+ [enable_mips_dspase1=$enableval], [enable_mips_dspase1=auto])
+
+if test $enable_mips_dspase1 = no ; then
+ have_mips_dspase1=disabled
+fi
+
+if test $have_mips_dspase1 = yes ; then
+ AC_DEFINE(USE_MIPS_DSPASE1, 1, [use MIPS DSP ASE Rev 1 optimizations])
+fi
+
+AM_CONDITIONAL(USE_MIPS_DSPASE1, test $have_mips_dspase1 = yes)
+
+AC_MSG_RESULT($have_mips_dspase1)
+if test $enable_mips_dspase1 = yes && test $have_mips_dspase1 = no ; then
+ AC_MSG_ERROR([MIPS DSP ASE Rev 1 not detected])
+fi
+
dnl ==============================================
dnl Static test programs
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index ca31301..d832db1 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -123,5 +123,27 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la
ASM_CFLAGS_arm_neon=
endif
+# MIPS32R2
+if USE_MIPS32R2
+noinst_LTLIBRARIES += libpixman-mips32r2.la
+libpixman_mips32r2_la_SOURCES = \
+ pixman-mips32r2.c \
+ pixman-mips32r2-asm.S
+libpixman_mips32r2_la_CFLAGS = $(DEP_CFLAGS)
+libpixman_mips32r2_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-mips32r2.la
+endif
+
+# MIPS DSP ASE Rev 1
+if USE_MIPS_DSPASE1
+noinst_LTLIBRARIES += libpixman-mips-dspase1.la
+libpixman_mips_dspase1_la_SOURCES = \
+ pixman-mips-dspase1.c \
+ pixman-mips-dspase1-asm.S
+libpixman_mips_dspase1_la_CFLAGS = $(DEP_CFLAGS)
+libpixman_mips_dspase1_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LIBADD += libpixman-mips-dspase1.la
+endif
+
.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
$(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 0e14ecb..ee6dc1c 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -573,6 +573,17 @@ pixman_have_sse2 (void)
#endif /* __amd64__ */
#endif
+#ifdef USE_MIPS32R2
+// note: no runtime check for MIPS32R2 support
+#define pixman_have_mips32r2() TRUE
+#endif
+
+#ifdef USE_MIPS_DSPASE1
+// note: no runtime check for MIPS DSP ASE Rev 1 support
+#define pixman_have_mips_dspase1() TRUE
+#endif
+
+
pixman_implementation_t *
_pixman_choose_implementation (void)
{
@@ -606,6 +617,16 @@ _pixman_choose_implementation (void)
imp = _pixman_implementation_create_vmx (imp);
#endif
+#ifdef USE_MIPS32R2
+ if (pixman_have_mips32r2 ())
+ imp = _pixman_implementation_create_mips32r2 (imp);
+#endif
+
+#ifdef USE_MIPS_DSPASE1
+ if (pixman_have_mips_dspase1 ())
+ imp = _pixman_implementation_create_mips_dspase1 (imp);
+#endif
+
return imp;
}
diff --git a/pixman/pixman-mips-dspase1-asm.S b/pixman/pixman-mips-dspase1-asm.S
new file mode 100644
index 0000000..b96fe83
--- /dev/null
+++ b/pixman/pixman-mips-dspase1-asm.S
@@ -0,0 +1,189 @@
+
+ .text
+ .set noreorder
+ .set nomacro
+
+
+// void
+// mips_dspase1_combine_over_u_nomask(uint32_t *dest, const uint32_t *src,
+// const uint32_t *mask, int width)
+
+ .global mips_dspase1_combine_over_u_nomask
+ .ent mips_dspase1_combine_over_u_nomask
+
+// note: this version to be used only when mask = NULL
+
+mips_dspase1_combine_over_u_nomask:
+ beqz $a3, 1f
+ subu $v0, $a1, $a0 // diff = src - dest (for LWX)
+
+ sll $a3, $a3, 2 // width <<= 2
+ addu $a3, $a0, $a3 // dest_end = dest + width
+
+ lw $t0, 0($a0) // dest
+ lwx $t1, $v0($a0) // src (dest + diff)
+
+ li $t9, 0x00800080
+
+0:
+ not $t2, $t1 // ~src
+ srl $t2, $t2, 24 // ALPHA_8(~src)
+ ins $t2, $t2, 16, 8 // 0:a:0:a; equivalent to replv.ph
+
+ muleu_s.ph.qbl $t3, $t0, $t2
+ muleu_s.ph.qbr $t4, $t0, $t2
+
+ lw $t0, 4($a0) // dest[1] for next loop iteration
+ addiu $a0, $a0, 4 // dest++
+
+ addu $t3, $t3, $t9 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t9 // can't overflow; rev2: addu_s.ph
+ preceu.ph.qbla $t5, $t3 // rev2: shrl.ph
+ preceu.ph.qbla $t6, $t4 // rev2: shrl.ph
+ addu $t3, $t3, $t5 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t6 // can't overflow; rev2: addu_s.ph
+
+ precrq.qb.ph $t3, $t3, $t4
+ addu_s.qb $t3, $t3, $t1
+
+ lwx $t1, $v0($a0) // src (dest + diff) for next loop iteration
+
+ bne $a0, $a3, 0b
+ sw $t3, -4($a0) // dest
+
+1:
+ jr $ra
+ nop
+
+ .end mips_dspase1_combine_over_u_nomask
+
+
+// void
+// mips_dspase1_combine_over_u_mask(uint32_t *dest, const uint32_t *src,
+// const uint32_t *mask, int width)
+
+ .global mips_dspase1_combine_over_u_mask
+ .ent mips_dspase1_combine_over_u_mask
+
+// note: this version to be used only when mask != NULL
+
+mips_dspase1_combine_over_u_mask:
+ beqz $a3, 1f
+ subu $v0, $a1, $a0 // sdiff = src - dest (for LWX)
+
+ subu $v1, $a2, $a0 // mdiff = mask - dest (for LWX)
+
+ sll $a3, $a3, 2 // width <<= 2
+ addu $a3, $a0, $a3 // dest_end = dest + width
+
+ li $t9, 0x00800080
+
+0:
+ lwx $t8, $v1($a0) // mask (dest + mdiff)
+ lwx $t1, $v0($a0) // src (dest + sdiff)
+
+ srl $t8, $t8, 24 // mask >>= A_SHIFT
+ ins $t8, $t8, 16, 8 // 0:m:0:m; equivalent to replv.ph
+
+ muleu_s.ph.qbl $t3, $t1, $t8
+ muleu_s.ph.qbr $t4, $t1, $t8
+
+ lw $t0, 0($a0) // dest
+
+ addu $t3, $t3, $t9 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t9 // can't overflow; rev2: addu_s.ph
+ preceu.ph.qbla $t5, $t3 // rev2: shrl.ph
+ preceu.ph.qbla $t6, $t4 // rev2: shrl.ph
+ addu $t3, $t3, $t5 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t6 // can't overflow; rev2: addu_s.ph
+ precrq.qb.ph $t1, $t3, $t4
+
+ not $t2, $t1 // ~src
+ srl $t2, $t2, 24 // ALPHA_8(~src)
+ ins $t2, $t2, 16, 8 // 0:a:0:a; equivalent to replv.ph
+
+ muleu_s.ph.qbl $t3, $t0, $t2
+ muleu_s.ph.qbr $t4, $t0, $t2
+
+ addiu $a0, $a0, 4 // dest++
+
+ addu $t3, $t3, $t9 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t9 // can't overflow; rev2: addu_s.ph
+ preceu.ph.qbla $t5, $t3 // rev2: shrl.ph
+ preceu.ph.qbla $t6, $t4 // rev2: shrl.ph
+ addu $t3, $t3, $t5 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t6 // can't overflow; rev2: addu_s.ph
+ precrq.qb.ph $t3, $t3, $t4
+ addu_s.qb $t3, $t3, $t1
+
+ bne $a0, $a3, 0b
+ sw $t3, -4($a0) // dest
+
+1:
+ jr $ra
+ nop
+
+ .end mips_dspase1_combine_over_u_mask
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+// void
+// mips_dspase1_composite_over_n_8_8888_inner(uint32_t *dest, const uint32_t src,
+// const uint8_t *mask, int width)
+
+ .global mips_dspase1_composite_over_n_8_8888_inner
+ .ent mips_dspase1_composite_over_n_8_8888_inner
+
+mips_dspase1_composite_over_n_8_8888_inner:
+ beqz $a3, 1f
+ sll $a3, $a3, 2 // width <<= 2
+
+ addu $a3, $a0, $a3 // dest_end = dest + width
+
+ li $t9, 0x00800080
+
+0:
+ lbu $t8, 0($a2) // mask
+ lw $t0, 0($a0) // dest
+ ins $t8, $t8, 16, 8 // 0:m:0:m; equivalent to replv.ph
+
+ muleu_s.ph.qbl $t3, $a1, $t8
+ muleu_s.ph.qbr $t4, $a1, $t8
+
+ addiu $a0, $a0, 4 // dest++
+ addiu $a2, $a2, 1 // mask++
+
+ addu $t3, $t3, $t9 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t9 // can't overflow; rev2: addu_s.ph
+ preceu.ph.qbla $t5, $t3 // rev2: shrl.ph
+ preceu.ph.qbla $t6, $t4 // rev2: shrl.ph
+ addu $t3, $t3, $t5 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t6 // can't overflow; rev2: addu_s.ph
+ precrq.qb.ph $t1, $t3, $t4 // in(src,m)
+
+ not $t2, $t1 // ~in(src,m)
+ srl $t2, $t2, 24
+ ins $t2, $t2, 16, 8 // 0:a:0:a; equivalent to replv.ph
+
+ muleu_s.ph.qbl $t3, $t0, $t2
+ muleu_s.ph.qbr $t4, $t0, $t2
+
+ addu $t3, $t3, $t9 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t9 // can't overflow; rev2: addu_s.ph
+ preceu.ph.qbla $t5, $t3 // rev2: shrl.ph
+ preceu.ph.qbla $t6, $t4 // rev2: shrl.ph
+ addu $t3, $t3, $t5 // can't overflow; rev2: addu_s.ph
+ addu $t4, $t4, $t6 // can't overflow; rev2: addu_s.ph
+ precrq.qb.ph $t3, $t3, $t4
+ addu_s.qb $t3, $t3, $t1 // over(in(src,m),dest)
+
+ bne $a0, $a3, 0b
+ sw $t3, -4($a0) // dest
+
+1:
+ jr $ra
+ nop
+
+ .end mips_dspase1_composite_over_n_8_8888_inner
+
diff --git a/pixman/pixman-mips-dspase1.c b/pixman/pixman-mips-dspase1.c
new file mode 100644
index 0000000..59722d2
--- /dev/null
+++ b/pixman/pixman-mips-dspase1.c
@@ -0,0 +1,107 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+
+// assembly-language functions
+
+void
+mips_dspase1_combine_over_u_nomask(uint32_t *dest, const uint32_t *src,
+ const uint32_t *mask, int width);
+
+void
+mips_dspase1_combine_over_u_mask(uint32_t *dest, const uint32_t *src,
+ const uint32_t *mask, int width);
+
+void
+mips_dspase1_composite_over_n_8_8888_inner(uint32_t *dest, uint32_t src,
+ const uint8_t *mask, int width);
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+static void
+mips_dspase1_combine_over_u(pixman_implementation_t *imp,
+ pixman_op_t op,
+ uint32_t * dest,
+ const uint32_t * src,
+ const uint32_t * mask,
+ int width)
+{
+ if (mask)
+ {
+// _pixman_implementation_combine_32(imp->delegate, op, dest, src, mask, width);
+ mips_dspase1_combine_over_u_mask(dest, src, mask, width);
+ }
+ else
+ {
+// _pixman_implementation_combine_32(imp->delegate, op, dest, src, mask, width);
+ mips_dspase1_combine_over_u_nomask(dest, src, mask, width);
+ }
+}
+
+
+static void
+mips_dspase1_fast_composite_over_n_8_8888(pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ mips_dspase1_composite_over_n_8_8888_inner(dst, src, mask, width);
+ }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+static const pixman_fast_path_t mips_dspase1_fast_paths[] =
+{
+ PIXMAN_STD_FAST_PATH(OVER, solid, a8, a8r8g8b8, mips_dspase1_fast_composite_over_n_8_8888),
+ { PIXMAN_OP_NONE }
+};
+
+
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspase1 (pixman_implementation_t *delegate)
+{
+ pixman_implementation_t *imp =
+ _pixman_implementation_create (delegate, mips_dspase1_fast_paths);
+
+ imp->combine_32[PIXMAN_OP_OVER] = mips_dspase1_combine_over_u;
+
+ return imp;
+}
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
new file mode 100644
index 0000000..e5b4a6c
--- /dev/null
+++ b/pixman/pixman-mips32r2-asm.S
@@ -0,0 +1,180 @@
+
+ .text
+ .set noreorder
+ .set nomacro
+
+
+// pixman_bool_t
+// mips32r2_pixman_fill32(uint32_t *bits, int stride, int x, int y,
+// int width, int height, uint32_t xor)
+
+ .global mips32r2_pixman_fill32
+ .ent mips32r2_pixman_fill32
+
+mips32r2_pixman_fill32:
+ mul $a3, $a1, $a3
+ addu $a3, $a3, $a2
+ sll $a3, $a3, 2
+ addu $a0, $a0, $a3 // bits = bits + y * stride + x
+
+ lw $a2, 16($sp) // width
+ lw $a3, 20($sp) // height
+ lw $v0, 24($sp) // xor
+
+ li $t0, ~7
+ beqz $a3, 5f // exit if height = 0
+ and $t0, $a2, $t0 // width8 = width & ~7
+
+ sll $a1, $a1, 2 // stride <<= 2
+ sll $t0, $t0, 2 // width8 <<= 2
+ sll $a2, $a2, 2 // width <<= 2
+
+0:
+ move $t1, $a0 // b = bits
+ addu $t2, $t1, $t0 // b + width8
+
+ beq $t1, $t2, 2f // skip unrolled loop if not enough samples
+ addu $t3, $t1, $a2 // b + width
+
+1:
+ sw $v0, 0($t1)
+ sw $v0, 4($t1)
+ sw $v0, 8($t1)
+ sw $v0,12($t1)
+ sw $v0,16($t1)
+ sw $v0,20($t1)
+ sw $v0,24($t1)
+
+ addiu $t1, $t1, 32 // b += 8
+ bne $t1, $t2, 1b // b = (bits + width8)?
+ sw $v0, -4($t1)
+
+2:
+ beq $t1, $t3, 4f // skip single-sample loop if all work done
+ addiu $a3, $a3, -1 // height--
+
+3:
+ addiu $t1, $t1, 4
+ bne $t1, $t3, 3b // b = (bits + width)?
+ sw $v0, -4($t1)
+
+4:
+ bnez $a3, 0b
+ addu $a0, $a0, $a1 // bits += stride
+
+5:
+ jr $ra
+ li $v0, 1
+
+ .end mips32r2_pixman_fill32
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+// void
+// mips32r2_composite_over_n_8_8888_inner(uint32_t *dest, const uint32_t src,
+// const uint8_t *mask, int width)
+
+ .global mips32r2_composite_over_n_8_8888_inner
+ .ent mips32r2_composite_over_n_8_8888_inner
+
+mips32r2_composite_over_n_8_8888_inner:
+ beqz $a3, 1f
+ sll $a3, $a3, 2 // width <<= 2
+
+ addu $a3, $a0, $a3 // dest_end = dest + width
+
+ li $t7, 0x01000100
+ li $t8, 0x00FF00FF // RB_MASK
+ li $t9, 0x00800080
+
+0:
+ lbu $t2, 0($a2) // mask
+
+ // in()
+
+ and $t5, $a1, $t8
+ mul $t3, $t5, $t2
+
+ lw $t0, 0($a0) // dest
+ addiu $a2, $a2, 1 // mask++
+
+ srl $t6, $a1, 8
+ and $t6, $t6, $t8
+ mul $t4, $t6, $t2
+
+ addu $t3, $t3, $t9
+ srl $t5, $t3, 8
+ and $t5, $t5, $t8
+ addu $t3, $t3, $t5
+ srl $t3, $t3, 8
+ and $t3, $t3, $t8
+
+ addu $t4, $t4, $t9
+ srl $t6, $t4, 8
+ and $t6, $t6, $t8
+ addu $t4, $t4, $t6
+ srl $t4, $t4, 8
+ and $t4, $t4, $t8
+
+ sll $t4, $t4, 8
+ or $t1, $t3, $t4
+
+
+ not $t2, $t1 // ~in()
+ srl $t2, $t2, 24
+
+ // over(): UN8_rb_MUL_UN8() and UN8_rb_ADD_UN8_rb()
+
+ and $t5, $t0, $t8
+ mul $t3, $t5, $t2
+
+ addiu $a0, $a0, 4 // dest++
+
+ srl $t6, $t0, 8
+ and $t6, $t6, $t8
+ mul $t4, $t6, $t2
+
+ addu $t3, $t3, $t9
+ srl $t5, $t3, 8
+ and $t5, $t5, $t8
+ addu $t3, $t3, $t5
+ srl $t3, $t3, 8
+ and $t3, $t3, $t8
+
+ and $t5, $t1, $t8
+ addu $t3, $t3, $t5
+ srl $t5, $t3, 8
+ and $t5, $t5, $t8
+ subu $t5, $t7, $t5
+ or $t3, $t3, $t5
+ and $t3, $t3, $t8
+
+ addu $t4, $t4, $t9
+ srl $t6, $t4, 8
+ and $t6, $t6, $t8
+ addu $t4, $t4, $t6
+ srl $t4, $t4, 8
+ and $t4, $t4, $t8
+
+ srl $t6, $t1, 8
+ and $t6, $t6, $t8
+ addu $t4, $t4, $t6
+ srl $t6, $t4, 8
+ and $t6, $t6, $t8
+ subu $t6, $t7, $t6
+ or $t4, $t4, $t6
+ and $t4, $t4, $t8
+
+ sll $t4, $t4, 8
+ or $t3, $t3, $t4
+
+ bne $a0, $a3, 0b
+ sw $t3, -4($a0) // dest
+
+1:
+ jr $ra
+ nop
+
+ .end mips32r2_composite_over_n_8_8888_inner
+
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
new file mode 100644
index 0000000..ec56a18
--- /dev/null
+++ b/pixman/pixman-mips32r2.c
@@ -0,0 +1,112 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+
+
+// assembly-language functions
+
+pixman_bool_t
+mips32r2_pixman_fill32(uint32_t *bits, int stride, int x, int y,
+ int width, int height, uint32_t xor);
+
+void
+mips32r2_composite_over_n_8_8888_inner(uint32_t *dest, uint32_t src,
+ const uint8_t *mask, int width);
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+static pixman_bool_t
+mips32r2_fill(pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t xor)
+{
+ pixman_bool_t b;
+
+ switch (bpp)
+ {
+ case 32:
+// b = _pixman_implementation_fill(imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+ b = mips32r2_pixman_fill32(bits, stride, x, y, width, height, xor);
+ break;
+
+ default:
+ b = _pixman_implementation_fill(imp->delegate, bits, stride, bpp,
+ x, y, width, height, xor);
+ break;
+ }
+
+ return b;
+}
+
+
+static void
+mips32r2_fast_composite_over_n_8_8888(pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
+{
+ uint32_t src, srca;
+ uint32_t *dst_line, *dst;
+ uint8_t *mask_line, *mask;
+ int dst_stride, mask_stride;
+
+ src = _pixman_image_get_solid (imp, src_image, dst_image->bits.format);
+
+ srca = src >> 24;
+ if (src == 0)
+ return;
+
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+ while (height--)
+ {
+ dst = dst_line;
+ dst_line += dst_stride;
+ mask = mask_line;
+ mask_line += mask_stride;
+
+ mips32r2_composite_over_n_8_8888_inner(dst, src, mask, width);
+ }
+}
+
+
+////////////////////////////////////////////////////////////////////////////////
+
+
+static const pixman_fast_path_t mips32r2_fast_paths[] =
+{
+ PIXMAN_STD_FAST_PATH(OVER, solid, a8, a8r8g8b8, mips32r2_fast_composite_over_n_8_8888),
+ { PIXMAN_OP_NONE }
+};
+
+
+pixman_implementation_t *
+_pixman_implementation_create_mips32r2 (pixman_implementation_t *delegate)
+{
+ pixman_implementation_t *imp =
+ _pixman_implementation_create (delegate, mips32r2_fast_paths);
+
+ imp->fill = mips32r2_fill;
+
+ return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 1473dc4..813598f 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -567,6 +567,17 @@ pixman_implementation_t *
_pixman_implementation_create_vmx (pixman_implementation_t *fallback);
#endif
+#ifdef USE_MIPS32R2
+pixman_implementation_t *
+_pixman_implementation_create_mips32r2 (pixman_implementation_t *delegate);
+#endif
+
+#ifdef USE_MIPS_DSPASE1
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspase1 (pixman_implementation_t *delegate);
+#endif
+
+
pixman_implementation_t *
_pixman_choose_implementation (void);
--
1.7.0.4
More information about the Pixman
mailing list