[Pixman] [PATCH 3/8] MIPS: MIPS32r2: Basic infrastructure for MIPS32r2 optimizations

Nemanja Lukic nemanja.lukic at rt-rk.com
Mon Aug 19 07:16:51 PDT 2013


Some of the optimizations introduced in previous DSPr2 commits were not DSPr2
specific. Some of the fast-paths didn't used DSPr2 instructions at all, and
rather utilized more generic MIPS32r2 instruction set for optimizations. Since
Pixman's run-time CPU detection only added DSPr2 fast-paths on 74K MIPS cores,
these optimizations couldn't be used on cores that don't support DSPr2, but do
support MIPS32r2 instructions (these are almost all newer MIPS CPU cores like
4K, 24K, 34K, 1004K, etc).
This patch extracts those MIPS32r2 specific optimizations into new mips32r2 set
of fast-paths, and adds infrastructure for future MIPS32r2-only optimizations
with appropriate build and run time support.
Following is the list of MIPS32r2 optimizations, introduced in previous DSPr2
patches, tested on MIPS 24Kc core:

Performance numbers before/after on MIPS-24kc @ 500 MHz

Referent (before):

    src_x888_8888 =  L1:  62.41  L2:  25.76  M: 26.77 ( 89.31%)  HT: 23.82  VT: 24.27  R: 20.11  RT: 11.37 ( 109Kops/s)
    src_0565_0565 =  L1: 253.40  L2:  60.85  M: 60.00 (100.03%)  HT: 31.04  VT: 30.59  R: 29.73  RT: 13.50 ( 122Kops/s)
    src_8888_8888 =  L1: 190.67  L2:  30.14  M: 30.43 (101.24%)  HT: 20.07  VT: 19.03  R: 19.04  RT: 10.20 ( 100Kops/s)
    src_0888_0888 =  L1: 180.32  L2:  40.88  M: 40.77 (101.87%)  HT: 23.84  VT: 23.56  R: 24.04  RT: 11.36 ( 108Kops/s)
    src_n_8888    =  L1:  94.93  L2:  56.78  M: 58.23 ( 96.88%)  HT: 49.54  VT: 47.02  R: 42.94  RT: 21.58 ( 154Kops/s)

Optimized (with these optimizations):

    src_x888_8888 =  L1: 124.93  L2:  42.84  M: 44.22 (146.85%)  HT: 26.36  VT: 27.24  R: 24.26  RT: 11.77 ( 111Kops/s)
    src_0565_0565 =  L1: 205.31  L2:  90.44  M:105.47 (175.71%)  HT: 37.51  VT: 34.80  R: 31.44  RT: 13.42 ( 121Kops/s)
    src_8888_8888 =  L1: 180.13  L2:  50.91  M: 53.58 (178.16%)  HT: 24.70  VT: 22.78  R: 21.63  RT: 11.53 ( 109Kops/s)
    src_0888_0888 =  L1: 142.02  L2:  67.91  M: 71.48 (178.40%)  HT: 29.96  VT: 26.80  R: 25.44  RT: 11.94 ( 112Kops/s)
    src_n_8888    =  L1: 250.50  L2: 143.04  M:181.86 (302.13%)  HT: 69.00  VT: 61.73  R: 55.14  RT: 20.77 ( 148Kops/s)
---
 configure.ac                    |   41 +++
 pixman/Makefile.am              |   20 ++-
 pixman/pixman-mips-common-asm.h |  282 +++++++++++++++++++++
 pixman/pixman-mips-common.h     |  450 +++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.S  |  361 +++++++++-------------------
 pixman/pixman-mips-dspr2-asm.h  |  280 +---------------------
 pixman/pixman-mips-dspr2.c      |  219 ++++++++---------
 pixman/pixman-mips-dspr2.h      |  438 ---------------------------------
 pixman/pixman-mips-memcpy-asm.S |  382 ----------------------------
 pixman/pixman-mips.c            |   58 ++++-
 pixman/pixman-mips32r2-asm.S    |  519 +++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips32r2-asm.h    |   84 +++++++
 pixman/pixman-mips32r2.c        |  185 ++++++++++++++
 pixman/pixman-private.h         |    5 +
 14 files changed, 1850 insertions(+), 1474 deletions(-)
 create mode 100644 pixman/pixman-mips-common-asm.h
 create mode 100644 pixman/pixman-mips-common.h
 delete mode 100644 pixman/pixman-mips-dspr2.h
 delete mode 100644 pixman/pixman-mips-memcpy-asm.S
 create mode 100644 pixman/pixman-mips32r2-asm.S
 create mode 100644 pixman/pixman-mips32r2-asm.h
 create mode 100644 pixman/pixman-mips32r2.c

diff --git a/configure.ac b/configure.ac
index f9390e9..08b5987 100644
--- a/configure.ac
+++ b/configure.ac
@@ -663,6 +663,47 @@ fi
 AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes)
 
 dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports MIPS32r2 instructions
+
+have_mips32r2=no
+AC_MSG_CHECKING(whether to use MIPS32r2 assembler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-mips32r2 $CFLAGS"
+
+AC_COMPILE_IFELSE([[
+int
+main () {
+    int a = 0, b = 0;
+    __asm__ __volatile__ (
+        "ins  %[b], %[a], 14, 14   \n\t"
+        : [b] "=r" (b)
+        : [a] "r" (a)
+    );
+    return b;
+}]], have_mips32r2=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(mips32r2,
+   [AC_HELP_STRING([--disable-mips32r2],
+                   [disable MIPS32r2 fast paths])],
+   [enable_mips32r2=$enableval], [enable_mips32r2=auto])
+
+if test $enable_mips32r2 = no ; then
+   have_mips32r2=disabled
+fi
+
+if test $have_mips32r2 = yes ; then
+   AC_DEFINE(USE_MIPS32R2, 1, [use MIPS32r2 assembly optimizations])
+fi
+
+AM_CONDITIONAL(USE_MIPS32R2, test $have_mips32r2 = yes)
+
+AC_MSG_RESULT($have_mips32r2)
+if test $enable_mips32r2 = yes && test $have_mips32r2 = no ; then
+   AC_MSG_ERROR([MIPS32r2 instructions not detected])
+fi
+
+dnl ==========================================================================
 dnl Check if assembler is gas compatible and supports MIPS DSPr2 instructions
 
 have_mips_dspr2=no
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index b9ea754..894d111 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -100,15 +100,29 @@ libpixman-iwmmxt.la: libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_DEP
 	$(AM_V_CCLD)$(libpixman_iwmmxt_la_LINK) libpixman_iwmmxt_la-pixman-mmx.lo $(libpixman_iwmmxt_la_LIBADD) $(LIBS)
 endif
 
+# mips32r2 code
+if USE_MIPS32R2
+noinst_LTLIBRARIES += libpixman-mips32r2.la
+libpixman_mips32r2_la_SOURCES = \
+        pixman-mips32r2.c \
+        pixman-mips-common.h \
+        pixman-mips-common-asm.h \
+        pixman-mips32r2-asm.S \
+        pixman-mips32r2-asm.h
+libpixman_1_la_LIBADD += libpixman-mips32r2.la
+
+ASM_CFLAGS_mips32r2=
+endif
+
 # mips dspr2 code
 if USE_MIPS_DSPR2
 noinst_LTLIBRARIES += libpixman-mips-dspr2.la
 libpixman_mips_dspr2_la_SOURCES = \
         pixman-mips-dspr2.c \
-        pixman-mips-dspr2.h \
+        pixman-mips-common.h \
+        pixman-mips-common-asm.h \
         pixman-mips-dspr2-asm.S \
-        pixman-mips-dspr2-asm.h \
-        pixman-mips-memcpy-asm.S
+        pixman-mips-dspr2-asm.h
 libpixman_1_la_LIBADD += libpixman-mips-dspr2.la
 
 ASM_CFLAGS_mips_dspr2=
diff --git a/pixman/pixman-mips-common-asm.h b/pixman/pixman-mips-common-asm.h
new file mode 100644
index 0000000..186f17a
--- /dev/null
+++ b/pixman/pixman-mips-common-asm.h
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2012-2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#ifndef PIXMAN_MIPS_COMMON_ASM_H
+#define PIXMAN_MIPS_COMMON_ASM_H
+
+#define zero $0
+#define AT   $1
+#define v0   $2
+#define v1   $3
+#define a0   $4
+#define a1   $5
+#define a2   $6
+#define a3   $7
+#define t0   $8
+#define t1   $9
+#define t2   $10
+#define t3   $11
+#define t4   $12
+#define t5   $13
+#define t6   $14
+#define t7   $15
+#define s0   $16
+#define s1   $17
+#define s2   $18
+#define s3   $19
+#define s4   $20
+#define s5   $21
+#define s6   $22
+#define s7   $23
+#define t8   $24
+#define t9   $25
+#define k0   $26
+#define k1   $27
+#define gp   $28
+#define sp   $29
+#define fp   $30
+#define s8   $30
+#define ra   $31
+
+/*
+ * LEAF_MIPS - declare common leaf routine for MIPS
+ */
+
+#define LEAF_MIPS(symbol, suffix)                       \
+                .globl  symbol##suffix;                 \
+                .align  2;                              \
+                .type   symbol##suffix, @function;      \
+                .ent    symbol##suffix, 0;              \
+symbol##suffix: .frame  sp, 0, ra;                      \
+                .set    push;                           \
+                .set    arch=mips32r2;                  \
+                .set    noreorder;                      \
+                .set    noat;
+
+/*
+ * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
+ */
+#define LEAF_MIPS32R2(symbol)                           \
+LEAF_MIPS(symbol, _mips32r2)
+
+/*
+ * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
+ */
+#define LEAF_MIPS_DSPR2(symbol)                         \
+LEAF_MIPS(symbol, _mips_dspr2)                          \
+                .set    dspr2;
+
+/*
+ * END - mark end of function
+ */
+#define END(function, suffix)                           \
+                .set    pop;                            \
+                .end    function##suffix;               \
+                .size   function##suffix,.-function##suffix
+
+/*
+ * END_MIPS32R2 - mark end of mips32r2 function
+ */
+#define END_MIPS32R2(function)                          \
+END(function, _mips32r2)
+
+/*
+ * END_MIPS_DSPR2 - mark end of mips_dspr2 function
+ */
+#define END_MIPS_DSPR2(function)                        \
+END(function, _mips_dspr2)
+
+
+/*
+ * Checks if stack offset is big enough for storing/restoring regs_num
+ * number of register to/from stack. Stack offset must be greater than
+ * or equal to the number of bytes needed for storing registers (regs_num*4).
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
+ * preserved for input arguments of the functions, already stored in a0-a3),
+ * stack size can be further optimized by utilizing this space.
+ */
+.macro CHECK_STACK_OFFSET regs_num, stack_offset
+.if \stack_offset < \regs_num * 4 - 16
+.error "Stack offset too small."
+.endif
+.endm
+
+/*
+ * Saves set of registers on stack. Maximum number of registers that
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * before registers are pushed in order to provide enough space on stack
+ * (offset must be multiple of 4, and must be big enough, as described by
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in
+ * combination with RESTORE_REGS_FROM_STACK macro. Example:
+ *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
+ *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
+                          r2  = 0, r3  = 0, r4  = 0, \
+                          r5  = 0, r6  = 0, r7  = 0, \
+                          r8  = 0, r9  = 0, r10 = 0, \
+                          r11 = 0, r12 = 0, r13 = 0, \
+                          r14 = 0
+    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
+    .error "Stack offset must be pozitive and multiple of 4."
+    .endif
+    .if \stack_offset != 0
+    addiu           sp, sp, -\stack_offset
+    .endif
+    sw              \r1, 0(sp)
+    .if \r2 != 0
+    sw              \r2, 4(sp)
+    .endif
+    .if \r3 != 0
+    sw              \r3, 8(sp)
+    .endif
+    .if \r4 != 0
+    sw              \r4, 12(sp)
+    .endif
+    .if \r5 != 0
+    CHECK_STACK_OFFSET 5, \stack_offset
+    sw              \r5, 16(sp)
+    .endif
+    .if \r6 != 0
+    CHECK_STACK_OFFSET 6, \stack_offset
+    sw              \r6, 20(sp)
+    .endif
+    .if \r7 != 0
+    CHECK_STACK_OFFSET 7, \stack_offset
+    sw              \r7, 24(sp)
+    .endif
+    .if \r8 != 0
+    CHECK_STACK_OFFSET 8, \stack_offset
+    sw              \r8, 28(sp)
+    .endif
+    .if \r9 != 0
+    CHECK_STACK_OFFSET 9, \stack_offset
+    sw              \r9, 32(sp)
+    .endif
+    .if \r10 != 0
+    CHECK_STACK_OFFSET 10, \stack_offset
+    sw              \r10, 36(sp)
+    .endif
+    .if \r11 != 0
+    CHECK_STACK_OFFSET 11, \stack_offset
+    sw              \r11, 40(sp)
+    .endif
+    .if \r12 != 0
+    CHECK_STACK_OFFSET 12, \stack_offset
+    sw              \r12, 44(sp)
+    .endif
+    .if \r13 != 0
+    CHECK_STACK_OFFSET 13, \stack_offset
+    sw              \r13, 48(sp)
+    .endif
+    .if \r14 != 0
+    CHECK_STACK_OFFSET 14, \stack_offset
+    sw              \r14, 52(sp)
+    .endif
+.endm
+
+/*
+ * Restores set of registers from stack. Maximum number of registers that
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * after registers are restored (offset must be multiple of 4, and must
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
+ * Example:
+ *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
+ *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
+                               r2  = 0, r3  = 0, r4  = 0, \
+                               r5  = 0, r6  = 0, r7  = 0, \
+                               r8  = 0, r9  = 0, r10 = 0, \
+                               r11 = 0, r12 = 0, r13 = 0, \
+                               r14 = 0
+    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
+    .error "Stack offset must be pozitive and multiple of 4."
+    .endif
+    lw              \r1, 0(sp)
+    .if \r2 != 0
+    lw              \r2, 4(sp)
+    .endif
+    .if \r3 != 0
+    lw              \r3, 8(sp)
+    .endif
+    .if \r4 != 0
+    lw              \r4, 12(sp)
+    .endif
+    .if \r5 != 0
+    CHECK_STACK_OFFSET 5, \stack_offset
+    lw              \r5, 16(sp)
+    .endif
+    .if \r6 != 0
+    CHECK_STACK_OFFSET 6, \stack_offset
+    lw              \r6, 20(sp)
+    .endif
+    .if \r7 != 0
+    CHECK_STACK_OFFSET 7, \stack_offset
+    lw              \r7, 24(sp)
+    .endif
+    .if \r8 != 0
+    CHECK_STACK_OFFSET 8, \stack_offset
+    lw              \r8, 28(sp)
+    .endif
+    .if \r9 != 0
+    CHECK_STACK_OFFSET 9, \stack_offset
+    lw              \r9, 32(sp)
+    .endif
+    .if \r10 != 0
+    CHECK_STACK_OFFSET 10, \stack_offset
+    lw              \r10, 36(sp)
+    .endif
+    .if \r11 != 0
+    CHECK_STACK_OFFSET 11, \stack_offset
+    lw              \r11, 40(sp)
+    .endif
+    .if \r12 != 0
+    CHECK_STACK_OFFSET 12, \stack_offset
+    lw              \r12, 44(sp)
+    .endif
+    .if \r13 != 0
+    CHECK_STACK_OFFSET 13, \stack_offset
+    lw              \r13, 48(sp)
+    .endif
+    .if \r14 != 0
+    CHECK_STACK_OFFSET 14, \stack_offset
+    lw              \r14, 52(sp)
+    .endif
+    .if \stack_offset != 0
+    addiu           sp, sp, \stack_offset
+    .endif
+.endm
+
+#endif /* PIXMAN_MIPS_COMMON_ASM_H */
diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
new file mode 100644
index 0000000..1c64964
--- /dev/null
+++ b/pixman/pixman-mips-common.h
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2012-2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#ifndef PIXMAN_MIPS_COMMON_H
+#define PIXMAN_MIPS_COMMON_H
+
+#include "pixman-private.h"
+#include "pixman-inlines.h"
+
+#define SKIP_ZERO_SRC  1
+#define SKIP_ZERO_MASK 2
+#define DO_FAST_MEMCPY 3
+
+void
+pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes);
+void
+pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+
+#ifdef USE_MIPS_DSPR2
+void
+pixman_fill_buff16_mips_dspr2 (void *dst, uint32_t n_bytes, uint16_t value);
+#endif
+
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name,          \
+                                           src_type, src_cnt,    \
+                                           dst_type, dst_cnt,    \
+                                           suffix)               \
+void                                                             \
+pixman_composite_##name##_asm##suffix (dst_type *dst,            \
+                                       src_type *src,            \
+                                       int32_t   w);             \
+                                                                 \
+static void                                                      \
+mips_composite_##name (pixman_implementation_t *imp,             \
+                       pixman_composite_info_t *info)            \
+{                                                                \
+    PIXMAN_COMPOSITE_ARGS (info);                                \
+    dst_type *dst_line, *dst;                                    \
+    src_type *src_line, *src;                                    \
+    int32_t dst_stride, src_stride;                              \
+    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;   \
+                                                                 \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,    \
+                           src_stride, src_line, src_cnt);       \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+                           dst_stride, dst_line, dst_cnt);       \
+                                                                 \
+    while (height--)                                             \
+    {                                                            \
+      dst = dst_line;                                            \
+      dst_line += dst_stride;                                    \
+      src = src_line;                                            \
+      src_line += src_stride;                                    \
+                                                                 \
+      if (flags == DO_FAST_MEMCPY)                               \
+        pixman_fast_memcpy_mips32r2 (dst, src, width * bpp);     \
+      else                                                       \
+        pixman_composite_##name##_asm##suffix (dst, src, width); \
+    }                                                            \
+}
+
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name,            \
+                                         dst_type, dst_cnt,      \
+                                         suffix)                 \
+void                                                             \
+pixman_composite_##name##_asm##suffix (dst_type *dst,            \
+                                       uint32_t  src,            \
+                                       int32_t   w);             \
+                                                                 \
+static void                                                      \
+mips_composite_##name (pixman_implementation_t *imp,             \
+                       pixman_composite_info_t *info)            \
+{                                                                \
+    PIXMAN_COMPOSITE_ARGS (info);                                \
+    dst_type  *dst_line, *dst;                                   \
+    int32_t    dst_stride;                                       \
+    uint32_t   src;                                              \
+                                                                 \
+    src = _pixman_image_get_solid (                              \
+    imp, src_image, dest_image->bits.format);                    \
+                                                                 \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                     \
+        return;                                                  \
+                                                                 \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+                           dst_stride, dst_line, dst_cnt);       \
+                                                                 \
+    while (height--)                                             \
+    {                                                            \
+        dst = dst_line;                                          \
+        dst_line += dst_stride;                                  \
+                                                                 \
+        pixman_composite_##name##_asm##suffix (dst, src, width); \
+    }                                                            \
+}
+
+/**********************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name,             \
+                                              mask_type, mask_cnt,     \
+                                              dst_type, dst_cnt,       \
+                                              suffix)                  \
+void                                                                   \
+pixman_composite_##name##_asm##suffix (dst_type  *dst,                 \
+                                       uint32_t  src,                  \
+                                       mask_type *mask,                \
+                                       int32_t   w);                   \
+                                                                       \
+static void                                                            \
+mips_composite_##name (pixman_implementation_t *imp,                   \
+                       pixman_composite_info_t *info)                  \
+{                                                                      \
+    PIXMAN_COMPOSITE_ARGS (info);                                      \
+    dst_type  *dst_line, *dst;                                         \
+    mask_type *mask_line, *mask;                                       \
+    int32_t    dst_stride, mask_stride;                                \
+    uint32_t   src;                                                    \
+                                                                       \
+    src = _pixman_image_get_solid (                                    \
+        imp, src_image, dest_image->bits.format);                      \
+                                                                       \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                           \
+        return;                                                        \
+                                                                       \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,       \
+                           dst_stride, dst_line, dst_cnt);             \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,      \
+                           mask_stride, mask_line, mask_cnt);          \
+                                                                       \
+    while (height--)                                                   \
+    {                                                                  \
+        dst = dst_line;                                                \
+        dst_line += dst_stride;                                        \
+        mask = mask_line;                                              \
+        mask_line += mask_stride;                                      \
+        pixman_composite_##name##_asm##suffix (dst, src, mask, width); \
+    }                                                                  \
+}
+
+/**********************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name,              \
+                                            src_type, src_cnt,         \
+                                            dst_type, dst_cnt,         \
+                                            suffix)                    \
+void                                                                   \
+pixman_composite_##name##_asm##suffix (dst_type  *dst,                 \
+                                       src_type  *src,                 \
+                                       uint32_t   mask,                \
+                                       int32_t    w);                  \
+                                                                       \
+static void                                                            \
+mips_composite_##name (pixman_implementation_t *imp,                   \
+                       pixman_composite_info_t *info)                  \
+{                                                                      \
+    PIXMAN_COMPOSITE_ARGS (info);                                      \
+    dst_type  *dst_line, *dst;                                         \
+    src_type  *src_line, *src;                                         \
+    int32_t    dst_stride, src_stride;                                 \
+    uint32_t   mask;                                                   \
+                                                                       \
+    mask = _pixman_image_get_solid (                                   \
+        imp, mask_image, dest_image->bits.format);                     \
+                                                                       \
+    if ((flags & SKIP_ZERO_MASK) && mask == 0)                         \
+        return;                                                        \
+                                                                       \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,       \
+                           dst_stride, dst_line, dst_cnt);             \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,          \
+                           src_stride, src_line, src_cnt);             \
+                                                                       \
+    while (height--)                                                   \
+    {                                                                  \
+        dst = dst_line;                                                \
+        dst_line += dst_stride;                                        \
+        src = src_line;                                                \
+        src_line += src_stride;                                        \
+                                                                       \
+        pixman_composite_##name##_asm##suffix (dst, src, mask, width); \
+    }                                                                  \
+}
+
+/************************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \
+                                                mask_type, mask_cnt,     \
+                                                dst_type, dst_cnt,       \
+                                                suffix)                  \
+void                                                                     \
+pixman_composite_##name##_asm##suffix (dst_type  *dst,                   \
+                                       src_type  *src,                   \
+                                       mask_type *mask,                  \
+                                       int32_t   w);                     \
+                                                                         \
+static void                                                              \
+mips_composite_##name (pixman_implementation_t *imp,                     \
+                       pixman_composite_info_t *info)                    \
+{                                                                        \
+    PIXMAN_COMPOSITE_ARGS (info);                                        \
+    dst_type  *dst_line, *dst;                                           \
+    src_type  *src_line, *src;                                           \
+    mask_type *mask_line, *mask;                                         \
+    int32_t    dst_stride, src_stride, mask_stride;                      \
+                                                                         \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);               \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,            \
+                           src_stride, src_line, src_cnt);               \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,        \
+                           mask_stride, mask_line, mask_cnt);            \
+                                                                         \
+    while (height--)                                                     \
+    {                                                                    \
+        dst = dst_line;                                                  \
+        dst_line += dst_stride;                                          \
+        mask = mask_line;                                                \
+        mask_line += mask_stride;                                        \
+        src = src_line;                                                  \
+        src_line += src_stride;                                          \
+        pixman_composite_##name##_asm##suffix (dst, src, mask, width);   \
+    }                                                                    \
+}
+
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op,                    \
+                                                src_type, dst_type,          \
+                                                suffix)                      \
+void                                                                         \
+pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix (                 \
+                                                   dst_type *       dst,     \
+                                                   const src_type * src,     \
+                                                   int32_t          w,       \
+                                                   pixman_fixed_t   vx,      \
+                                                   pixman_fixed_t   unit_x); \
+                                                                             \
+static force_inline void                                                     \
+scaled_nearest_scanline_mips_##name##_##op (dst_type *       pd,             \
+                                            const src_type * ps,             \
+                                            int32_t          w,              \
+                                            pixman_fixed_t   vx,             \
+                                            pixman_fixed_t   unit_x,         \
+                                            pixman_fixed_t   max_vx,         \
+                                            pixman_bool_t    zero_src)       \
+{                                                                            \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix (pd, ps, w,   \
+                                                                vx, unit_x); \
+}                                                                            \
+                                                                             \
+FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op,                             \
+                       scaled_nearest_scanline_mips_##name##_##op,           \
+                       src_type, dst_type, COVER)                            \
+FAST_NEAREST_MAINLOOP (mips_##name##_none_##op,                              \
+                       scaled_nearest_scanline_mips_##name##_##op,           \
+                       src_type, dst_type, NONE)                             \
+FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op,                               \
+                       scaled_nearest_scanline_mips_##name##_##op,           \
+                       src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                    \
+    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                            \
+    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                             \
+    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op,           \
+                                                  src_type, dst_type,         \
+                                                  suffix)                     \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix (                  \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   const uint8_t *  mask,     \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x);  \
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_mips_##name##_##op (const uint8_t *  mask,            \
+                                            dst_type *       pd,              \
+                                            const src_type * ps,              \
+                                            int32_t          w,               \
+                                            pixman_fixed_t   vx,              \
+                                            pixman_fixed_t   unit_x,          \
+                                            pixman_fixed_t   max_vx,          \
+                                            pixman_bool_t    zero_src)        \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+        return;                                                               \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix (pd, ps,       \
+                                                                mask, w,      \
+                                                                vx, unit_x);  \
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op,                       \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op,                        \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op,                         \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)             \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+/*******************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,               \
+                                                 src_type, dst_type,            \
+                                                 suffix)                        \
+void                                                                            \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix(                    \
+                                             dst_type *       dst,              \
+                                             const src_type * src_top,          \
+                                             const src_type * src_bottom,       \
+                                             int32_t          w,                \
+                                             int              wt,               \
+                                             int              wb,               \
+                                             pixman_fixed_t   vx,               \
+                                             pixman_fixed_t   unit_x);          \
+static force_inline void                                                        \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,              \
+                                             const uint32_t * mask,             \
+                                             const src_type * src_top,          \
+                                             const src_type * src_bottom,       \
+                                             int32_t          w,                \
+                                             int              wt,               \
+                                             int              wb,               \
+                                             pixman_fixed_t   vx,               \
+                                             pixman_fixed_t   unit_x,           \
+                                             pixman_fixed_t   max_vx,           \
+                                             pixman_bool_t    zero_src)         \
+{                                                                               \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                    \
+        return;                                                                 \
+    pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix (dst, src_top,  \
+                                                                 src_bottom, w, \
+                                                                 wt, wb,        \
+                                                                 vx, unit_x);   \
+}                                                                               \
+                                                                                \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                        \
+                       scaled_bilinear_scanline_mips_##name##_##op,             \
+                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)          \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                         \
+                       scaled_bilinear_scanline_mips_##name##_##op,             \
+                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)           \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                          \
+                       scaled_bilinear_scanline_mips_##name##_##op,             \
+                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)            \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                       \
+                       scaled_bilinear_scanline_mips_##name##_##op,             \
+                       src_type, uint32_t, dst_type, NORMAL,                    \
+                       FLAG_NONE)
+
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op,          \
+                                                src_type, dst_type,           \
+                                                suffix)                       \
+void                                                                          \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix (                 \
+                                             dst_type *       dst,            \
+                                             const uint8_t *  mask,           \
+                                             const src_type * top,            \
+                                             const src_type * bottom,         \
+                                             int              wt,             \
+                                             int              wb,             \
+                                             pixman_fixed_t   x,              \
+                                             pixman_fixed_t   ux,             \
+                                             int              width);         \
+                                                                              \
+static force_inline void                                                      \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,            \
+                                             const uint8_t *  mask,           \
+                                             const src_type * src_top,        \
+                                             const src_type * src_bottom,     \
+                                             int32_t          w,              \
+                                             int              wt,             \
+                                             int              wb,             \
+                                             pixman_fixed_t   vx,             \
+                                             pixman_fixed_t   unit_x,         \
+                                             pixman_fixed_t   max_vx,         \
+                                             pixman_bool_t    zero_src)       \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+        return;                                                               \
+    pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix (             \
+                      dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
+}                                                                             \
+                                                                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                      \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, COVER,                    \
+                       FLAG_HAVE_NON_SOLID_MASK)                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                       \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, NONE,                     \
+                       FLAG_HAVE_NON_SOLID_MASK)                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                        \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, PAD,                      \
+                       FLAG_HAVE_NON_SOLID_MASK)                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                     \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, NORMAL,                   \
+                       FLAG_HAVE_NON_SOLID_MASK)
+
+#endif /* PIXMAN_MIPS_COMMON_H */
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 866e93e..f8eadf1 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012
+ * Copyright (c) 2012-2013
  *      MIPS Technologies, Inc., California.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,13 +26,13 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic at mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
  */
 
 #include "pixman-private.h"
 #include "pixman-mips-dspr2-asm.h"
 
-LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
+LEAF_MIPS_DSPR2(pixman_fill_buff16)
 /*
  * a0 - *dest
  * a1 - count (bytes)
@@ -86,58 +86,9 @@ LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
     jr       ra
      nop
 
-END(pixman_fill_buff16_mips)
+END_MIPS_DSPR2(pixman_fill_buff16)
 
-LEAF_MIPS32R2(pixman_fill_buff32_mips)
-/*
- * a0 - *dest
- * a1 - count (bytes)
- * a2 - value to fill buffer with
- */
-
-    beqz     a1, 3f
-     nop
-    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
-    beqz     t1, 2f
-     nop
-1:
-    addiu    t1, t1, -1
-    beqz     t1, 11f
-     addiu   a1, a1, -32
-    pref     30, 32(a0)
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    b        1b
-     addiu   a0, a0, 32
-11:
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    addiu    a0, a0, 32
-2:
-    blez     a1, 3f
-     addiu   a1, a1, -4
-    sw       a2, 0(a0)
-    b        2b
-     addiu   a0, a0, 4
-3:
-    jr       ra
-     nop
-
-END(pixman_fill_buff32_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm)
 /*
  * a0 - dst (r5g6b5)
  * a1 - src (a8r8g8b8)
@@ -178,9 +129,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_src_8888_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_8888_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm)
 /*
  * a0 - dst (a8r8g8b8)
  * a1 - src (r5g6b5)
@@ -220,98 +171,10 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_src_0565_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (x8r8g8b8)
- * a2 - w
- */
-
-    beqz     a2, 4f
-     nop
-    li       t9, 0xff000000
-    srl      t8, a2, 3    /* t1 = how many multiples of 8 src pixels */
-    beqz     t8, 3f       /* branch if less than 8 src pixels */
-     nop
-1:
-    addiu    t8, t8, -1
-    beqz     t8, 2f
-     addiu   a2, a2, -8
-    pref     0, 32(a1)
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    lw       t2, 8(a1)
-    lw       t3, 12(a1)
-    lw       t4, 16(a1)
-    lw       t5, 20(a1)
-    lw       t6, 24(a1)
-    lw       t7, 28(a1)
-    addiu    a1, a1, 32
-    or       t0, t0, t9
-    or       t1, t1, t9
-    or       t2, t2, t9
-    or       t3, t3, t9
-    or       t4, t4, t9
-    or       t5, t5, t9
-    or       t6, t6, t9
-    or       t7, t7, t9
-    pref     30, 32(a0)
-    sw       t0, 0(a0)
-    sw       t1, 4(a0)
-    sw       t2, 8(a0)
-    sw       t3, 12(a0)
-    sw       t4, 16(a0)
-    sw       t5, 20(a0)
-    sw       t6, 24(a0)
-    sw       t7, 28(a0)
-    b        1b
-     addiu   a0, a0, 32
-2:
-    lw       t0, 0(a1)
-    lw       t1, 4(a1)
-    lw       t2, 8(a1)
-    lw       t3, 12(a1)
-    lw       t4, 16(a1)
-    lw       t5, 20(a1)
-    lw       t6, 24(a1)
-    lw       t7, 28(a1)
-    addiu    a1, a1, 32
-    or       t0, t0, t9
-    or       t1, t1, t9
-    or       t2, t2, t9
-    or       t3, t3, t9
-    or       t4, t4, t9
-    or       t5, t5, t9
-    or       t6, t6, t9
-    or       t7, t7, t9
-    sw       t0, 0(a0)
-    sw       t1, 4(a0)
-    sw       t2, 8(a0)
-    sw       t3, 12(a0)
-    sw       t4, 16(a0)
-    sw       t5, 20(a0)
-    sw       t6, 24(a0)
-    sw       t7, 28(a0)
-    beqz     a2, 4f
-     addiu   a0, a0, 32
-3:
-    lw       t0, 0(a1)
-    addiu    a1, a1, 4
-    addiu    a2, a2, -1
-    or       t1, t0, t9
-    sw       t1, 0(a0)
-    bnez     a2, 3b
-     addiu   a0, a0, 4
-4:
-    jr       ra
-     nop
-
-END(pixman_composite_src_x888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_0565_8888_asm)
 
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm)
 /*
  * a0 - dst (a8r8g8b8)
  * a1 - src (b8g8r8)
@@ -502,9 +365,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_src_0888_8888_rev_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm)
 /*
  * a0 - dst (r5g6b5)
  * a1 - src (b8g8r8)
@@ -696,10 +559,10 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_src_0888_0565_rev_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm)
 #endif
 
-LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm)
 /*
  * a0 - dst  (a8b8g8r8)
  * a1 - src  (a8r8g8b8)
@@ -759,9 +622,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_src_pixbuf_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -818,9 +681,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_src_rpixbuf_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (32bit constant)
@@ -871,9 +734,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_src_n_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm)
 /*
  * a0 - dst  (a8)
  * a1 - src  (32bit constant)
@@ -951,9 +814,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_src_n_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_n_8_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (32bit constant)
@@ -1071,9 +934,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
     j            ra
      nop
 
-END(pixman_composite_over_n_8888_8888_ca_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (32bit constant)
@@ -1204,9 +1067,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
     j            ra
      nop
 
-END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm)
 /*
  * a0 - dst  (a8)
  * a1 - src  (32bit constant)
@@ -1324,9 +1187,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_over_n_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (32bit constant)
@@ -1432,9 +1295,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
     j         ra
      nop
 
-END(pixman_composite_over_n_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (32bit constant)
@@ -1548,9 +1411,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_n_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -1599,9 +1462,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_8888_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (a8r8g8b8)
@@ -1656,9 +1519,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_over_8888_n_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (r5g6b5)
@@ -1715,9 +1578,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_0565_n_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -1766,9 +1629,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_8888_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (a8r8g8b8)
@@ -1824,9 +1687,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_8888_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (r5g6b5)
@@ -1884,9 +1747,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_0565_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -1937,9 +1800,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_over_8888_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -2008,9 +1871,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
     j            ra
      nop
 
-END(pixman_composite_over_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (a8r8g8b8)
@@ -2086,9 +1949,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
     j            ra
      nop
 
-END(pixman_composite_over_8888_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (32bit constant)
@@ -2156,9 +2019,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
     j            ra
      nop
 
-END(pixman_composite_over_n_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (32bit constant)
@@ -2220,9 +2083,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
     j            ra
      nop
 
-END(pixman_composite_over_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm)
 /*
  * a0 - dst  (a8)
  * a1 - src  (a8)
@@ -2324,9 +2187,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_add_8_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8_8_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm)
 /*
  * a0 - dst  (a8)
  * a1 - src  (32bit constant)
@@ -2420,9 +2283,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_add_n_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_n_8_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (32bit constant)
@@ -2472,9 +2335,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_add_n_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (r5g6b5)
@@ -2535,9 +2398,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_add_0565_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -2589,9 +2452,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_add_8888_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -2642,9 +2505,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_add_8888_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8r8g8b8)
@@ -2699,9 +2562,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_add_8888_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm)
 /*
  * a0 - dst  (a8)
  * a1 - src  (a8)
@@ -2767,9 +2630,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_add_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm)
 /*
  * a0 - dst (a8r8g8b8)
  * a1 - src (a8r8g8b8)
@@ -2844,9 +2707,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
     jr           ra
      nop
 
-END(pixman_composite_add_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
  * a1 - src  (a8)
@@ -2905,9 +2768,9 @@ LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_out_reverse_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (a8)
@@ -2954,9 +2817,9 @@ LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
     j        ra
      nop
 
-END(pixman_composite_out_reverse_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm)
 /*
  * a0 - dst  (a8r8g8b8)
  * a1 - src  (32bit constant)
@@ -3090,9 +2953,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_over_reverse_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm)
 /*
  * a0 - dst  (a8)
  * a1 - src  (32bit constant)
@@ -3166,9 +3029,9 @@ LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
     j                 ra
      nop
 
-END(pixman_composite_in_n_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_in_n_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm)
 /*
  * a0     - dst  (a8r8g8b8)
  * a1     - src  (a8r8g8b8)
@@ -3227,9 +3090,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm)
 /*
  * a0     - dst  (r5g6b5)
  * a1     - src  (a8r8g8b8)
@@ -3293,9 +3156,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm)
 /*
  * a0     - dst (a8r8g8b8)
  * a1     - src (r5g6b5)
@@ -3352,9 +3215,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm)
 /*
  * a0     - dst  (r5g6b5)
  * a1     - src  (a8r8g8b8)
@@ -3429,9 +3292,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm)
 /*
  * a0     - dst  (r5g6b5)
  * a1     - src  (r5g6b5)
@@ -3509,9 +3372,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *src_top
@@ -3567,9 +3430,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *src_top
@@ -3626,9 +3489,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *src_top
@@ -3690,9 +3553,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *src_top
@@ -3755,9 +3618,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm)
 /*
  * a0     - *dst
  * a1     - *src_top
@@ -3816,9 +3679,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm)
 /*
  * a0     - *dst
  * a1     - *src_top
@@ -3876,9 +3739,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
     j            ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *mask
@@ -3940,9 +3803,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *mask
@@ -4005,9 +3868,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *mask
@@ -4076,9 +3939,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm)
 /*
  * a0     - *dst
  * a1     - *mask
@@ -4148,9 +4011,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm)
 /*
  * a0     - dst        (a8r8g8b8)
  * a1     - mask       (a8)
@@ -4215,9 +4078,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm)
 
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm)
 /*
  * a0     - *dst
  * a1     - *mask
@@ -4280,4 +4143,4 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
     j        ra
      nop
 
-END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index cab122d..b43d008 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012
+ * Copyright (c) 2012-2013
  *      MIPS Technologies, Inc., California.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,268 +26,13 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic at mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
  */
 
 #ifndef PIXMAN_MIPS_DSPR2_ASM_H
 #define PIXMAN_MIPS_DSPR2_ASM_H
 
-#define zero $0
-#define AT   $1
-#define v0   $2
-#define v1   $3
-#define a0   $4
-#define a1   $5
-#define a2   $6
-#define a3   $7
-#define t0   $8
-#define t1   $9
-#define t2   $10
-#define t3   $11
-#define t4   $12
-#define t5   $13
-#define t6   $14
-#define t7   $15
-#define s0   $16
-#define s1   $17
-#define s2   $18
-#define s3   $19
-#define s4   $20
-#define s5   $21
-#define s6   $22
-#define s7   $23
-#define t8   $24
-#define t9   $25
-#define k0   $26
-#define k1   $27
-#define gp   $28
-#define sp   $29
-#define fp   $30
-#define s8   $30
-#define ra   $31
-
-/*
- * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
- */
-#define LEAF_MIPS32R2(symbol)                           \
-                .globl  symbol;                         \
-                .align  2;                              \
-                .type   symbol, @function;              \
-                .ent    symbol, 0;                      \
-symbol:         .frame  sp, 0, ra;                      \
-                .set    push;                           \
-                .set    arch=mips32r2;                  \
-                .set    noreorder;                      \
-                .set    noat;
-
-/*
- * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2
- */
-#define LEAF_MIPS_DSPR2(symbol)                         \
-LEAF_MIPS32R2(symbol)                                   \
-                .set    dspr2;
-
-/*
- * END - mark end of function
- */
-#define END(function)                                   \
-                .set    pop;                            \
-                .end    function;                       \
-                .size   function,.-function
-
-/*
- * Checks if stack offset is big enough for storing/restoring regs_num
- * number of register to/from stack. Stack offset must be greater than
- * or equal to the number of bytes needed for storing registers (regs_num*4).
- * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
- * preserved for input arguments of the functions, already stored in a0-a3),
- * stack size can be further optimized by utilizing this space.
- */
-.macro CHECK_STACK_OFFSET regs_num, stack_offset
-.if \stack_offset < \regs_num * 4 - 16
-.error "Stack offset too small."
-.endif
-.endm
-
-/*
- * Saves set of registers on stack. Maximum number of registers that
- * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
- * Stack offset is number of bytes that are added to stack pointer (sp)
- * before registers are pushed in order to provide enough space on stack
- * (offset must be multiple of 4, and must be big enough, as described by
- * CHECK_STACK_OFFSET macro). This macro is intended to be used in
- * combination with RESTORE_REGS_FROM_STACK macro. Example:
- *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
- *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
- */
-.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
-                          r2  = 0, r3  = 0, r4  = 0, \
-                          r5  = 0, r6  = 0, r7  = 0, \
-                          r8  = 0, r9  = 0, r10 = 0, \
-                          r11 = 0, r12 = 0, r13 = 0, \
-                          r14 = 0
-    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
-    .error "Stack offset must be pozitive and multiple of 4."
-    .endif
-    .if \stack_offset != 0
-    addiu           sp, sp, -\stack_offset
-    .endif
-    sw              \r1, 0(sp)
-    .if \r2 != 0
-    sw              \r2, 4(sp)
-    .endif
-    .if \r3 != 0
-    sw              \r3, 8(sp)
-    .endif
-    .if \r4 != 0
-    sw              \r4, 12(sp)
-    .endif
-    .if \r5 != 0
-    CHECK_STACK_OFFSET 5, \stack_offset
-    sw              \r5, 16(sp)
-    .endif
-    .if \r6 != 0
-    CHECK_STACK_OFFSET 6, \stack_offset
-    sw              \r6, 20(sp)
-    .endif
-    .if \r7 != 0
-    CHECK_STACK_OFFSET 7, \stack_offset
-    sw              \r7, 24(sp)
-    .endif
-    .if \r8 != 0
-    CHECK_STACK_OFFSET 8, \stack_offset
-    sw              \r8, 28(sp)
-    .endif
-    .if \r9 != 0
-    CHECK_STACK_OFFSET 9, \stack_offset
-    sw              \r9, 32(sp)
-    .endif
-    .if \r10 != 0
-    CHECK_STACK_OFFSET 10, \stack_offset
-    sw              \r10, 36(sp)
-    .endif
-    .if \r11 != 0
-    CHECK_STACK_OFFSET 11, \stack_offset
-    sw              \r11, 40(sp)
-    .endif
-    .if \r12 != 0
-    CHECK_STACK_OFFSET 12, \stack_offset
-    sw              \r12, 44(sp)
-    .endif
-    .if \r13 != 0
-    CHECK_STACK_OFFSET 13, \stack_offset
-    sw              \r13, 48(sp)
-    .endif
-    .if \r14 != 0
-    CHECK_STACK_OFFSET 14, \stack_offset
-    sw              \r14, 52(sp)
-    .endif
-.endm
-
-/*
- * Restores set of registers from stack. Maximum number of registers that
- * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
- * Stack offset is number of bytes that are added to stack pointer (sp)
- * after registers are restored (offset must be multiple of 4, and must
- * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
- * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
- * Example:
- *  SAVE_REGS_ON_STACK      4, v0, v1, s0, s1
- *  RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
- */
-.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
-                               r2  = 0, r3  = 0, r4  = 0, \
-                               r5  = 0, r6  = 0, r7  = 0, \
-                               r8  = 0, r9  = 0, r10 = 0, \
-                               r11 = 0, r12 = 0, r13 = 0, \
-                               r14 = 0
-    .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
-    .error "Stack offset must be pozitive and multiple of 4."
-    .endif
-    lw              \r1, 0(sp)
-    .if \r2 != 0
-    lw              \r2, 4(sp)
-    .endif
-    .if \r3 != 0
-    lw              \r3, 8(sp)
-    .endif
-    .if \r4 != 0
-    lw              \r4, 12(sp)
-    .endif
-    .if \r5 != 0
-    CHECK_STACK_OFFSET 5, \stack_offset
-    lw              \r5, 16(sp)
-    .endif
-    .if \r6 != 0
-    CHECK_STACK_OFFSET 6, \stack_offset
-    lw              \r6, 20(sp)
-    .endif
-    .if \r7 != 0
-    CHECK_STACK_OFFSET 7, \stack_offset
-    lw              \r7, 24(sp)
-    .endif
-    .if \r8 != 0
-    CHECK_STACK_OFFSET 8, \stack_offset
-    lw              \r8, 28(sp)
-    .endif
-    .if \r9 != 0
-    CHECK_STACK_OFFSET 9, \stack_offset
-    lw              \r9, 32(sp)
-    .endif
-    .if \r10 != 0
-    CHECK_STACK_OFFSET 10, \stack_offset
-    lw              \r10, 36(sp)
-    .endif
-    .if \r11 != 0
-    CHECK_STACK_OFFSET 11, \stack_offset
-    lw              \r11, 40(sp)
-    .endif
-    .if \r12 != 0
-    CHECK_STACK_OFFSET 12, \stack_offset
-    lw              \r12, 44(sp)
-    .endif
-    .if \r13 != 0
-    CHECK_STACK_OFFSET 13, \stack_offset
-    lw              \r13, 48(sp)
-    .endif
-    .if \r14 != 0
-    CHECK_STACK_OFFSET 14, \stack_offset
-    lw              \r14, 52(sp)
-    .endif
-    .if \stack_offset != 0
-    addiu           sp, sp, \stack_offset
-    .endif
-.endm
-
-/*
- * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
- * returned in (out_8888) register. Requires two temporary registers
- * (scratch1 and scratch2).
- */
-.macro CONVERT_1x0565_TO_1x8888 in_565,   \
-                                out_8888, \
-                                scratch1, scratch2
-    lui     \out_8888, 0xff00
-    sll     \scratch1, \in_565,   0x3
-    andi    \scratch2, \scratch1, 0xff
-    ext     \scratch1, \in_565,   0x2, 0x3
-    or      \scratch1, \scratch2, \scratch1
-    or      \out_8888, \out_8888, \scratch1
-
-    sll     \scratch1, \in_565,   0x5
-    andi    \scratch1, \scratch1, 0xfc00
-    srl     \scratch2, \in_565,   0x1
-    andi    \scratch2, \scratch2, 0x300
-    or      \scratch2, \scratch1, \scratch2
-    or      \out_8888, \out_8888, \scratch2
-
-    andi    \scratch1, \in_565,   0xf800
-    srl     \scratch2, \scratch1, 0x5
-    andi    \scratch2, \scratch2, 0xff00
-    or      \scratch1, \scratch1, \scratch2
-    sll     \scratch1, \scratch1, 0x8
-    or      \out_8888, \out_8888, \scratch1
-.endm
+#include "pixman-mips32r2-asm.h"
 
 /*
  * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels
@@ -324,23 +69,6 @@ LEAF_MIPS32R2(symbol)                                   \
 .endm
 
 /*
- * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
- * returned in (out_565) register. Requires two temporary registers
- * (scratch1 and scratch2).
- */
-.macro CONVERT_1x8888_TO_1x0565 in_8888, \
-                                out_565, \
-                                scratch1, scratch2
-    ext     \out_565,  \in_8888,  0x3, 0x5
-    srl     \scratch1, \in_8888,  0x5
-    andi    \scratch1, \scratch1, 0x07e0
-    srl     \scratch2, \in_8888,  0x8
-    andi    \scratch2, \scratch2, 0xf800
-    or      \out_565,  \out_565,  \scratch1
-    or      \out_565,  \out_565,  \scratch2
-.endm
-
-/*
  * Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5
  * pixels returned in (out1_565 and out2_565) registers. Requires two temporary
  * registers (scratch1 and scratch2). It also requires maskR, maskG and maskB
@@ -707,4 +435,4 @@ LEAF_MIPS32R2(symbol)                                   \
     precrq.qb.ph    \tl,       \alpha, \scratch1
 .endm
 
-#endif //PIXMAN_MIPS_DSPR2_ASM_H
+#endif /* PIXMAN_MIPS_DSPR2_ASM_H */
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index e10c9df..742c5e8 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012
+ * Copyright (c) 2012-2013
  *      MIPS Technologies, Inc., California.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,7 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * Author:  Nemanja Lukic (nlukic at mips.com)
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
  */
 
 #ifdef HAVE_CONFIG_H
@@ -34,134 +34,126 @@
 #endif
 
 #include "pixman-private.h"
-#include "pixman-mips-dspr2.h"
+#include "pixman-mips-common.h"
 
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888,
-                                    uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_8888_0565,
-                                    uint32_t, 1, uint16_t, 1)
+                                    uint32_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0565_8888,
-                                    uint16_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565,
-                                    uint16_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
-                                    uint8_t, 3, uint8_t, 3)
+                                    uint16_t, 1, uint32_t, 1, _mips_dspr2)
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
-                                    uint8_t, 3, uint32_t, 1)
+                                    uint8_t, 3, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
-                                    uint8_t, 3, uint16_t, 1)
+                                    uint8_t, 3, uint16_t, 1, _mips_dspr2)
 #endif
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
-                                    uint32_t, 1, uint32_t, 1)
+                                    uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
-                                    uint32_t, 1, uint32_t, 1)
+                                    uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
+                                    uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
-                                    uint32_t, 1, uint16_t, 1)
+                                    uint32_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
-                                    uint8_t, 1, uint8_t, 1)
+                                    uint8_t, 1, uint8_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
-                                    uint32_t, 1, uint32_t, 1)
+                                    uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565,
-                                    uint8_t, 1, uint16_t, 1)
+                                    uint8_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888,
-                                    uint8_t,  1, uint32_t, 1)
+                                    uint8_t,  1, uint32_t, 1, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888,
-                                       uint8_t, 1, uint32_t, 1)
+                                       uint8_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8,
-                                       uint8_t, 1, uint8_t, 1)
+                                       uint8_t, 1, uint8_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
-                                       uint32_t, 1, uint32_t, 1)
+                                       uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
-                                       uint32_t, 1, uint16_t, 1)
+                                       uint32_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8,
-                                       uint8_t, 1, uint8_t, 1)
+                                       uint8_t, 1, uint8_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
-                                       uint8_t, 1, uint32_t, 1)
+                                       uint8_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
-                                       uint8_t, 1, uint16_t, 1)
+                                       uint8_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8,
-                                       uint8_t, 1, uint8_t, 1)
+                                       uint8_t, 1, uint8_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888,
-                                       uint8_t, 1, uint32_t, 1)
+                                       uint8_t, 1, uint32_t, 1, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888,
-                                      uint32_t, 1, uint32_t, 1)
+                                      uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_0565,
-                                      uint32_t, 1, uint16_t, 1)
+                                      uint32_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
-                                      uint16_t, 1, uint16_t, 1)
+                                      uint16_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
-                                      uint32_t, 1, uint32_t, 1)
+                                      uint32_t, 1, uint32_t, 1, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
-                                  uint16_t, 1)
+                                  uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
-                                  uint32_t, 1)
+                                  uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888,
-                                  uint32_t, 1)
+                                  uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8,
-                                  uint8_t, 1)
+                                  uint8_t, 1, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
-                                         uint8_t,  1, uint8_t,  1)
+                                         uint8_t,  1, uint8_t,  1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
-                                         uint8_t, 1, uint32_t, 1)
+                                         uint8_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8888_8888, uint32_t, 1,
-                                         uint32_t, 1, uint32_t, 1)
+                                         uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1,
-                                         uint8_t,  1, uint16_t, 1)
+                                         uint8_t,  1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
-                                         uint8_t, 1, uint32_t, 1)
+                                         uint8_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1,
-                                         uint8_t, 1, uint16_t, 1)
+                                         uint8_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
-                                         uint8_t, 1, uint16_t, 1)
+                                         uint8_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
-                                         uint32_t, 1, uint32_t, 1)
+                                         uint32_t, 1, uint32_t, 1, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
-                                         uint32_t, uint32_t)
+                                         uint32_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
-                                         uint32_t, uint16_t)
+                                         uint32_t, uint16_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
-                                         uint16_t, uint32_t)
+                                         uint16_t, uint32_t, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
-                                          uint32_t, uint32_t)
+                                          uint32_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
-                                          uint32_t, uint16_t)
+                                          uint32_t, uint16_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC,
-                                          uint16_t, uint32_t)
+                                          uint16_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC,
-                                          uint16_t, uint16_t)
+                                          uint16_t, uint16_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
-                                          uint32_t, uint32_t)
+                                          uint32_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
-                                          uint32_t, uint32_t)
+                                          uint32_t, uint32_t, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565,
-                                            OVER, uint32_t, uint16_t)
+                                            OVER, uint32_t, uint16_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565,
-                                            OVER, uint16_t, uint16_t)
+                                            OVER, uint16_t, uint16_t, _mips_dspr2)
 
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
-                                             uint32_t, uint32_t)
+                                             uint32_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
-                                             uint32_t, uint16_t)
+                                             uint32_t, uint16_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC,
-                                             uint16_t, uint32_t)
+                                             uint16_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC,
-                                             uint16_t, uint16_t)
+                                             uint16_t, uint16_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER,
-                                             uint32_t, uint32_t)
+                                             uint32_t, uint32_t, _mips_dspr2)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD,
-                                             uint32_t, uint32_t)
+                                             uint32_t, uint32_t, _mips_dspr2)
 
 static pixman_bool_t
 mips_dspr2_fill (pixman_implementation_t *imp,
@@ -176,6 +168,7 @@ mips_dspr2_fill (pixman_implementation_t *imp,
 {
     uint8_t *byte_line;
     uint32_t byte_width;
+
     switch (bpp)
     {
     case 16:
@@ -188,20 +181,32 @@ mips_dspr2_fill (pixman_implementation_t *imp,
         {
             uint8_t *dst = byte_line;
             byte_line += stride;
-            pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
+            pixman_fill_buff16_mips_dspr2 (dst, byte_width, _xor & 0xffff);
         }
         return TRUE;
     case 32:
+#ifdef USE_MIPS32R2
         stride = stride * (int) sizeof (uint32_t) / 4;
         byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
         byte_width = width * 4;
         stride *= 4;
-
+#else
+        bits = bits + y * stride + x;
+#endif
         while (height--)
         {
+#ifdef USE_MIPS32R2
             uint8_t *dst = byte_line;
             byte_line += stride;
-            pixman_fill_buff32_mips (dst, byte_width, _xor);
+            pixman_fill_buff32_mips32r2 (dst, byte_width, _xor);
+#else
+            int i;
+
+            for (i = 0; i < width; ++i)
+                bits[i] = _xor;
+
+            bits += stride;
+#endif
         }
         return TRUE;
     default:
@@ -230,58 +235,51 @@ mips_dspr2_blt (pixman_implementation_t *imp,
     uint8_t *src_bytes;
     uint8_t *dst_bytes;
     uint32_t byte_width;
+    int32_t temp;
 
-    switch (src_bpp)
+    temp = src_bpp >> 3;
+
+    src_stride = src_stride * (int) sizeof (uint32_t) / temp;
+    dst_stride = dst_stride * (int) sizeof (uint32_t) / temp;
+    if (src_bpp == 16)
     {
-    case 16:
-        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
         src_bytes =(uint8_t *)(((uint16_t *)src_bits)
                                           + src_stride * (src_y) + (src_x));
         dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
                                            + dst_stride * (dest_y) + (dest_x));
-        byte_width = width * 2;
-        src_stride *= 2;
-        dst_stride *= 2;
-
-        while (height--)
-        {
-            uint8_t *src = src_bytes;
-            uint8_t *dst = dst_bytes;
-            src_bytes += src_stride;
-            dst_bytes += dst_stride;
-            pixman_mips_fast_memcpy (dst, src, byte_width);
-        }
-        return TRUE;
-    case 32:
-        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+    }
+    else if (src_bpp == 32)
+    {
         src_bytes = (uint8_t *)(((uint32_t *)src_bits)
                                            + src_stride * (src_y) + (src_x));
         dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
                                            + dst_stride * (dest_y) + (dest_x));
-        byte_width = width * 4;
-        src_stride *= 4;
-        dst_stride *= 4;
-
-        while (height--)
-        {
-            uint8_t *src = src_bytes;
-            uint8_t *dst = dst_bytes;
-            src_bytes += src_stride;
-            dst_bytes += dst_stride;
-            pixman_mips_fast_memcpy (dst, src, byte_width);
-        }
-        return TRUE;
-    default:
+    }
+    else
         return FALSE;
+
+    byte_width = width * temp;
+    src_stride *= temp;
+    dst_stride *= temp;
+
+    while (height--)
+    {
+        uint8_t *src = src_bytes;
+        uint8_t *dst = dst_bytes;
+        src_bytes += src_stride;
+        dst_bytes += dst_stride;
+#ifdef USE_MIPS32R2
+        pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+#else
+        memcpy (dst, src, byte_width);
+#endif
     }
+
+    return TRUE;
 }
 
 static const pixman_fast_path_t mips_dspr2_fast_paths[] =
 {
-    PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   mips_composite_src_0565_0565),
-    PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, b5g6r5,   mips_composite_src_0565_0565),
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5,   mips_composite_src_8888_0565),
     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5,   mips_composite_src_8888_0565),
     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5,   mips_composite_src_8888_0565),
@@ -290,15 +288,6 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, x8r8g8b8, mips_composite_src_0565_8888),
     PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, a8b8g8r8, mips_composite_src_0565_8888),
     PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, x8b8g8r8, mips_composite_src_0565_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
-    PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
 #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
     PIXMAN_STD_FAST_PATH (SRC, b8g8r8,   null, x8r8g8b8, mips_composite_src_0888_8888_rev),
     PIXMAN_STD_FAST_PATH (SRC, b8g8r8,   null, r5g6b5,   mips_composite_src_0888_0565_rev),
@@ -437,10 +426,10 @@ mips_dspr2_combine_over_u (pixman_implementation_t *imp,
                            int                      width)
 {
     if (mask)
-        pixman_composite_over_8888_8888_8888_asm_mips (
+        pixman_composite_over_8888_8888_8888_asm_mips_dspr2 (
             dest, (uint32_t *)src, (uint32_t *)mask, width);
     else
-        pixman_composite_over_8888_8888_asm_mips (
+        pixman_composite_over_8888_8888_asm_mips_dspr2 (
 		    dest, (uint32_t *)src, width);
 }
 
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
deleted file mode 100644
index 955ed70..0000000
--- a/pixman/pixman-mips-dspr2.h
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author:  Nemanja Lukic (nlukic at mips.com)
- */
-
-#ifndef PIXMAN_MIPS_DSPR2_H
-#define PIXMAN_MIPS_DSPR2_H
-
-#include "pixman-private.h"
-#include "pixman-inlines.h"
-
-#define SKIP_ZERO_SRC  1
-#define SKIP_ZERO_MASK 2
-#define DO_FAST_MEMCPY 3
-
-void
-pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
-void
-pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
-void
-pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
-
-/****************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name,          \
-                                           src_type, src_cnt,    \
-                                           dst_type, dst_cnt)    \
-void                                                             \
-pixman_composite_##name##_asm_mips (dst_type *dst,               \
-                                    src_type *src,               \
-                                    int32_t   w);                \
-                                                                 \
-static void                                                      \
-mips_composite_##name (pixman_implementation_t *imp,             \
-                       pixman_composite_info_t *info)            \
-{                                                                \
-    PIXMAN_COMPOSITE_ARGS (info);                                \
-    dst_type *dst_line, *dst;                                    \
-    src_type *src_line, *src;                                    \
-    int32_t dst_stride, src_stride;                              \
-    int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;   \
-                                                                 \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,    \
-                           src_stride, src_line, src_cnt);       \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
-                           dst_stride, dst_line, dst_cnt);       \
-                                                                 \
-    while (height--)                                             \
-    {                                                            \
-      dst = dst_line;                                            \
-      dst_line += dst_stride;                                    \
-      src = src_line;                                            \
-      src_line += src_stride;                                    \
-                                                                 \
-      if (flags == DO_FAST_MEMCPY)                               \
-        pixman_mips_fast_memcpy (dst, src, width * bpp);         \
-      else                                                       \
-        pixman_composite_##name##_asm_mips (dst, src, width);    \
-    }                                                            \
-}
-
-/****************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name,            \
-                                         dst_type, dst_cnt)      \
-void                                                             \
-pixman_composite_##name##_asm_mips (dst_type *dst,               \
-                                    uint32_t  src,               \
-                                    int32_t   w);                \
-                                                                 \
-static void                                                      \
-mips_composite_##name (pixman_implementation_t *imp,             \
-                       pixman_composite_info_t *info)            \
-{                                                                \
-    PIXMAN_COMPOSITE_ARGS (info);                                \
-    dst_type  *dst_line, *dst;                                   \
-    int32_t    dst_stride;                                       \
-    uint32_t   src;                                              \
-                                                                 \
-    src = _pixman_image_get_solid (                              \
-    imp, src_image, dest_image->bits.format);                    \
-                                                                 \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                     \
-        return;                                                  \
-                                                                 \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
-                           dst_stride, dst_line, dst_cnt);       \
-                                                                 \
-    while (height--)                                             \
-    {                                                            \
-        dst = dst_line;                                          \
-        dst_line += dst_stride;                                  \
-                                                                 \
-        pixman_composite_##name##_asm_mips (dst, src, width);    \
-    }                                                            \
-}
-
-/*******************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name,          \
-                                              mask_type, mask_cnt,  \
-                                              dst_type, dst_cnt)    \
-void                                                                \
-pixman_composite_##name##_asm_mips (dst_type  *dst,                 \
-                                    uint32_t  src,                  \
-                                    mask_type *mask,                \
-                                    int32_t   w);                   \
-                                                                    \
-static void                                                         \
-mips_composite_##name (pixman_implementation_t *imp,                \
-                       pixman_composite_info_t *info)               \
-{                                                                   \
-    PIXMAN_COMPOSITE_ARGS (info);                                   \
-    dst_type  *dst_line, *dst;                                      \
-    mask_type *mask_line, *mask;                                    \
-    int32_t    dst_stride, mask_stride;                             \
-    uint32_t   src;                                                 \
-                                                                    \
-    src = _pixman_image_get_solid (                                 \
-        imp, src_image, dest_image->bits.format);                   \
-                                                                    \
-    if ((flags & SKIP_ZERO_SRC) && src == 0)                        \
-        return;                                                     \
-                                                                    \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,    \
-                           dst_stride, dst_line, dst_cnt);          \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,   \
-                           mask_stride, mask_line, mask_cnt);       \
-                                                                    \
-    while (height--)                                                \
-    {                                                               \
-        dst = dst_line;                                             \
-        dst_line += dst_stride;                                     \
-        mask = mask_line;                                           \
-        mask_line += mask_stride;                                   \
-        pixman_composite_##name##_asm_mips (dst, src, mask, width); \
-    }                                                               \
-}
-
-/*******************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name,           \
-                                            src_type, src_cnt,      \
-                                            dst_type, dst_cnt)      \
-void                                                                \
-pixman_composite_##name##_asm_mips (dst_type  *dst,                 \
-                                    src_type  *src,                 \
-                                    uint32_t   mask,                \
-                                    int32_t    w);                  \
-                                                                    \
-static void                                                         \
-mips_composite_##name (pixman_implementation_t *imp,                \
-                       pixman_composite_info_t *info)               \
-{                                                                   \
-    PIXMAN_COMPOSITE_ARGS (info);                                   \
-    dst_type  *dst_line, *dst;                                      \
-    src_type  *src_line, *src;                                      \
-    int32_t    dst_stride, src_stride;                              \
-    uint32_t   mask;                                                \
-                                                                    \
-    mask = _pixman_image_get_solid (                                \
-        imp, mask_image, dest_image->bits.format);                  \
-                                                                    \
-    if ((flags & SKIP_ZERO_MASK) && mask == 0)                      \
-        return;                                                     \
-                                                                    \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,    \
-                           dst_stride, dst_line, dst_cnt);          \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,       \
-                           src_stride, src_line, src_cnt);          \
-                                                                    \
-    while (height--)                                                \
-    {                                                               \
-        dst = dst_line;                                             \
-        dst_line += dst_stride;                                     \
-        src = src_line;                                             \
-        src_line += src_stride;                                     \
-                                                                    \
-        pixman_composite_##name##_asm_mips (dst, src, mask, width); \
-    }                                                               \
-}
-
-/************************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \
-                                                mask_type, mask_cnt,     \
-                                                dst_type, dst_cnt)       \
-void                                                                     \
-pixman_composite_##name##_asm_mips (dst_type  *dst,                      \
-                                    src_type  *src,                      \
-                                    mask_type *mask,                     \
-                                    int32_t   w);                        \
-                                                                         \
-static void                                                              \
-mips_composite_##name (pixman_implementation_t *imp,                     \
-                       pixman_composite_info_t *info)                    \
-{                                                                        \
-    PIXMAN_COMPOSITE_ARGS (info);                                        \
-    dst_type  *dst_line, *dst;                                           \
-    src_type  *src_line, *src;                                           \
-    mask_type *mask_line, *mask;                                         \
-    int32_t    dst_stride, src_stride, mask_stride;                      \
-                                                                         \
-    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);               \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,            \
-                           src_stride, src_line, src_cnt);               \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,        \
-                           mask_stride, mask_line, mask_cnt);            \
-                                                                         \
-    while (height--)                                                     \
-    {                                                                    \
-        dst = dst_line;                                                  \
-        dst_line += dst_stride;                                          \
-        mask = mask_line;                                                \
-        mask_line += mask_stride;                                        \
-        src = src_line;                                                  \
-        src_line += src_stride;                                          \
-        pixman_composite_##name##_asm_mips (dst, src, mask, width);      \
-    }                                                                    \
-}
-
-/****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op,                    \
-                                                src_type, dst_type)          \
-void                                                                         \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                    \
-                                                   dst_type *       dst,     \
-                                                   const src_type * src,     \
-                                                   int32_t          w,       \
-                                                   pixman_fixed_t   vx,      \
-                                                   pixman_fixed_t   unit_x); \
-                                                                             \
-static force_inline void                                                     \
-scaled_nearest_scanline_mips_##name##_##op (dst_type *       pd,             \
-                                            const src_type * ps,             \
-                                            int32_t          w,              \
-                                            pixman_fixed_t   vx,             \
-                                            pixman_fixed_t   unit_x,         \
-                                            pixman_fixed_t   max_vx,         \
-                                            pixman_bool_t    zero_src)       \
-{                                                                            \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w,      \
-                                                             vx, unit_x);    \
-}                                                                            \
-                                                                             \
-FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op,                             \
-                       scaled_nearest_scanline_mips_##name##_##op,           \
-                       src_type, dst_type, COVER)                            \
-FAST_NEAREST_MAINLOOP (mips_##name##_none_##op,                              \
-                       scaled_nearest_scanline_mips_##name##_##op,           \
-                       src_type, dst_type, NONE)                             \
-FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op,                               \
-                       scaled_nearest_scanline_mips_##name##_##op,           \
-                       src_type, dst_type, PAD)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func)                    \
-    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),                            \
-    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                             \
-    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
-
-
-/*****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op,           \
-                                                  src_type, dst_type)         \
-void                                                                          \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                     \
-                                                   dst_type *       dst,      \
-                                                   const src_type * src,      \
-                                                   const uint8_t *  mask,     \
-                                                   int32_t          w,        \
-                                                   pixman_fixed_t   vx,       \
-                                                   pixman_fixed_t   unit_x);  \
-                                                                              \
-static force_inline void                                                      \
-scaled_nearest_scanline_mips_##name##_##op (const uint8_t *  mask,            \
-                                            dst_type *       pd,              \
-                                            const src_type * ps,              \
-                                            int32_t          w,               \
-                                            pixman_fixed_t   vx,              \
-                                            pixman_fixed_t   unit_x,          \
-                                            pixman_fixed_t   max_vx,          \
-                                            pixman_bool_t    zero_src)        \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-        return;                                                               \
-    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps,          \
-                                                             mask, w,         \
-                                                             vx, unit_x);     \
-}                                                                             \
-                                                                              \
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op,                       \
-                              scaled_nearest_scanline_mips_##name##_##op,     \
-                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op,                        \
-                              scaled_nearest_scanline_mips_##name##_##op,     \
-                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op,                         \
-                              scaled_nearest_scanline_mips_##name##_##op,     \
-                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)             \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
-    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-/****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
-                                                 src_type, dst_type)         \
-void                                                                         \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips(                    \
-                                             dst_type *       dst,           \
-                                             const src_type * src_top,       \
-                                             const src_type * src_bottom,    \
-                                             int32_t          w,             \
-                                             int              wt,            \
-                                             int              wb,            \
-                                             pixman_fixed_t   vx,            \
-                                             pixman_fixed_t   unit_x);       \
-static force_inline void                                                     \
-scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,           \
-                                             const uint32_t * mask,          \
-                                             const src_type * src_top,       \
-                                             const src_type * src_bottom,    \
-                                             int32_t          w,             \
-                                             int              wt,            \
-                                             int              wb,            \
-                                             pixman_fixed_t   vx,            \
-                                             pixman_fixed_t   unit_x,        \
-                                             pixman_fixed_t   max_vx,        \
-                                             pixman_bool_t    zero_src)      \
-{                                                                            \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                 \
-        return;                                                              \
-    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top,  \
-                                                              src_bottom, w, \
-                                                              wt, wb,        \
-                                                              vx, unit_x);   \
-}                                                                            \
-                                                                             \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                     \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)       \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                      \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)        \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                       \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)         \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                    \
-                       scaled_bilinear_scanline_mips_##name##_##op,          \
-                       src_type, uint32_t, dst_type, NORMAL,                 \
-                       FLAG_NONE)
-
-/*****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op,          \
-                                                src_type, dst_type)           \
-void                                                                          \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (                    \
-                                             dst_type *       dst,            \
-                                             const uint8_t *  mask,           \
-                                             const src_type * top,            \
-                                             const src_type * bottom,         \
-                                             int              wt,             \
-                                             int              wb,             \
-                                             pixman_fixed_t   x,              \
-                                             pixman_fixed_t   ux,             \
-                                             int              width);         \
-                                                                              \
-static force_inline void                                                      \
-scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,            \
-                                             const uint8_t *  mask,           \
-                                             const src_type * src_top,        \
-                                             const src_type * src_bottom,     \
-                                             int32_t          w,              \
-                                             int              wt,             \
-                                             int              wb,             \
-                                             pixman_fixed_t   vx,             \
-                                             pixman_fixed_t   unit_x,         \
-                                             pixman_fixed_t   max_vx,         \
-                                             pixman_bool_t    zero_src)       \
-{                                                                             \
-    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
-        return;                                                               \
-    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (                \
-                      dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-}                                                                             \
-                                                                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                      \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, COVER,                    \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                       \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, NONE,                     \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                        \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, PAD,                      \
-                       FLAG_HAVE_NON_SOLID_MASK)                              \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                     \
-                       scaled_bilinear_scanline_mips_##name##_##op,           \
-                       src_type, uint8_t, dst_type, NORMAL,                   \
-                       FLAG_HAVE_NON_SOLID_MASK)
-
-#endif //PIXMAN_MIPS_DSPR2_H
diff --git a/pixman/pixman-mips-memcpy-asm.S b/pixman/pixman-mips-memcpy-asm.S
deleted file mode 100644
index 9ad6da5..0000000
--- a/pixman/pixman-mips-memcpy-asm.S
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2012
- *      MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *    contributors may be used to endorse or promote products derived from
- *    this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "pixman-mips-dspr2-asm.h"
-
-/*
- * This routine could be optimized for MIPS64. The current code only
- * uses MIPS32 instructions.
- */
-
-#ifdef EB
-#  define LWHI	lwl		/* high part is left in big-endian */
-#  define SWHI	swl		/* high part is left in big-endian */
-#  define LWLO	lwr		/* low part is right in big-endian */
-#  define SWLO	swr		/* low part is right in big-endian */
-#else
-#  define LWHI	lwr		/* high part is right in little-endian */
-#  define SWHI	swr		/* high part is right in little-endian */
-#  define LWLO	lwl		/* low part is left in big-endian */
-#  define SWLO	swl		/* low part is left in big-endian */
-#endif
-
-LEAF_MIPS32R2(pixman_mips_fast_memcpy)
-
-	slti	AT, a2, 8
-	bne	AT, zero, $last8
-	move	v0, a0	/* memcpy returns the dst pointer */
-
-/* Test if the src and dst are word-aligned, or can be made word-aligned */
-	xor	t8, a1, a0
-	andi	t8, t8, 0x3		/* t8 is a0/a1 word-displacement */
-
-	bne	t8, zero, $unaligned
-	negu	a3, a0
-
-	andi	a3, a3, 0x3	/* we need to copy a3 bytes to make a0/a1 aligned */
-	beq	a3, zero, $chk16w	/* when a3=0 then the dst (a0) is word-aligned */
-	subu	a2, a2, a3	/* now a2 is the remining bytes count */
-
-	LWHI	t8, 0(a1)
-	addu	a1, a1, a3
-	SWHI	t8, 0(a0)
-	addu	a0, a0, a3
-
-/* Now the dst/src are mutually word-aligned with word-aligned addresses */
-$chk16w:	andi	t8, a2, 0x3f	/* any whole 64-byte chunks? */
-				/* t8 is the byte count after 64-byte chunks */
-
-	beq	a2, t8, $chk8w	/* if a2==t8, no 64-byte chunks */
-				/* There will be at most 1 32-byte chunk after it */
-	subu	a3, a2, t8	/* subtract from a2 the reminder */
-                                /* Here a3 counts bytes in 16w chunks */
-	addu	a3, a0, a3	/* Now a3 is the final dst after 64-byte chunks */
-
-	addu	t0, a0, a2	/* t0 is the "past the end" address */
-
-/*
- * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
- * the "t0-32" address
- * This means: for x=128 the last "safe" a0 address is "t0-160"
- * Alternatively, for x=64 the last "safe" a0 address is "t0-96"
- * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
- */
-	subu	t9, t0, 160	/* t9 is the "last safe pref 30, 128(a0)" address */
-
-	pref    0, 0(a1)		/* bring the first line of src, addr 0 */
-	pref    0, 32(a1)	/* bring the second line of src, addr 32 */
-	pref    0, 64(a1)	/* bring the third line of src, addr 64 */
-	pref	30, 32(a0)	/* safe, as we have at least 64 bytes ahead */
-/* In case the a0 > t9 don't use "pref 30" at all */
-	sgtu	v1, a0, t9
-	bgtz	v1, $loop16w	/* skip "pref 30, 64(a0)" for too short arrays */
-	nop
-/* otherwise, start with using pref30 */
-	pref	30, 64(a0)
-$loop16w:
-	pref	0, 96(a1)
-	lw	t0, 0(a1)
-	bgtz	v1, $skip_pref30_96	/* skip "pref 30, 96(a0)" */
-	lw	t1, 4(a1)
-	pref    30, 96(a0)   /* continue setting up the dest, addr 96 */
-$skip_pref30_96:
-	lw	t2, 8(a1)
-	lw	t3, 12(a1)
-	lw	t4, 16(a1)
-	lw	t5, 20(a1)
-	lw	t6, 24(a1)
-	lw	t7, 28(a1)
-        pref    0, 128(a1)    /* bring the next lines of src, addr 128 */
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-
-	lw	t0, 32(a1)
-	bgtz	v1, $skip_pref30_128	/* skip "pref 30, 128(a0)" */
-	lw	t1, 36(a1)
-	pref    30, 128(a0)   /* continue setting up the dest, addr 128 */
-$skip_pref30_128:
-	lw	t2, 40(a1)
-	lw	t3, 44(a1)
-	lw	t4, 48(a1)
-	lw	t5, 52(a1)
-	lw	t6, 56(a1)
-	lw	t7, 60(a1)
-        pref    0, 160(a1)    /* bring the next lines of src, addr 160 */
-
-	sw	t0, 32(a0)
-	sw	t1, 36(a0)
-	sw	t2, 40(a0)
-	sw	t3, 44(a0)
-	sw	t4, 48(a0)
-	sw	t5, 52(a0)
-	sw	t6, 56(a0)
-	sw	t7, 60(a0)
-
-	addiu	a0, a0, 64	/* adding 64 to dest */
-	sgtu	v1, a0, t9
-	bne	a0, a3, $loop16w
-	addiu	a1, a1, 64	/* adding 64 to src */
-	move	a2, t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes to go */
-
-$chk8w:
-	pref 0, 0x0(a1)
-	andi	t8, a2, 0x1f	/* is there a 32-byte chunk? */
-				/* the t8 is the reminder count past 32-bytes */
-	beq	a2, t8, $chk1w	/* when a2=t8, no 32-byte chunk */
-	 nop
-
-	lw	t0, 0(a1)
-	lw	t1, 4(a1)
-	lw	t2, 8(a1)
-	lw	t3, 12(a1)
-	lw	t4, 16(a1)
-	lw	t5, 20(a1)
-	lw	t6, 24(a1)
-	lw	t7, 28(a1)
-	addiu	a1, a1, 32
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-	addiu	a0, a0, 32
-
-$chk1w:
-	andi	a2, t8, 0x3	/* now a2 is the reminder past 1w chunks */
-	beq	a2, t8, $last8
-	subu	a3, t8, a2	/* a3 is count of bytes in 1w chunks */
-	addu	a3, a0, a3	/* now a3 is the dst address past the 1w chunks */
-
-/* copying in words (4-byte chunks) */
-$wordCopy_loop:
-	lw	t3, 0(a1)	/* the first t3 may be equal t0 ... optimize? */
-	addiu	a1, a1, 4
-	addiu	a0, a0, 4
-	bne	a0, a3, $wordCopy_loop
-	sw	t3, -4(a0)
-
-/* For the last (<8) bytes */
-$last8:
-	blez	a2, leave
-	addu	a3, a0, a2	/* a3 is the last dst address */
-$last8loop:
-	lb	v1, 0(a1)
-	addiu	a1, a1, 1
-	addiu	a0, a0, 1
-	bne	a0, a3, $last8loop
-	sb	v1, -1(a0)
-
-leave:	j	ra
-	nop
-
-/*
- * UNALIGNED case
- */
-
-$unaligned:
-	/* got here with a3="negu a0" */
-	andi	a3, a3, 0x3	/* test if the a0 is word aligned */
-	beqz	a3, $ua_chk16w
-	subu	a2, a2, a3	/* bytes left after initial a3 bytes */
-
-	LWHI	v1, 0(a1)
-	LWLO	v1, 3(a1)
-	addu	a1, a1, a3	/* a3 may be here 1, 2 or 3 */
-	SWHI	v1, 0(a0)
-	addu	a0, a0, a3	/* below the dst will be word aligned (NOTE1) */
-
-$ua_chk16w:	andi	t8, a2, 0x3f	/* any whole 64-byte chunks? */
-				/* t8 is the byte count after 64-byte chunks */
-	beq	a2, t8, $ua_chk8w	/* if a2==t8, no 64-byte chunks */
-				/* There will be at most 1 32-byte chunk after it */
-	subu	a3, a2, t8	/* subtract from a2 the reminder */
-                                /* Here a3 counts bytes in 16w chunks */
-	addu	a3, a0, a3	/* Now a3 is the final dst after 64-byte chunks */
-
-	addu	t0, a0, a2	/* t0 is the "past the end" address */
-
-	subu	t9, t0, 160	/* t9 is the "last safe pref 30, 128(a0)" address */
-
-	pref    0, 0(a1)		/* bring the first line of src, addr 0 */
-	pref    0, 32(a1)	/* bring the second line of src, addr 32 */
-	pref    0, 64(a1)	/* bring the third line of src, addr 64 */
-	pref	30, 32(a0)	/* safe, as we have at least 64 bytes ahead */
-/* In case the a0 > t9 don't use "pref 30" at all */
-	sgtu	v1, a0, t9
-	bgtz	v1, $ua_loop16w	/* skip "pref 30, 64(a0)" for too short arrays */
-	nop
-/* otherwise,  start with using pref30 */
-	pref	30, 64(a0)
-$ua_loop16w:
-	pref	0, 96(a1)
-	LWHI	t0, 0(a1)
-	LWLO	t0, 3(a1)
-	LWHI	t1, 4(a1)
-	bgtz	v1, $ua_skip_pref30_96
-	LWLO	t1, 7(a1)
-	pref    30, 96(a0)   /* continue setting up the dest, addr 96 */
-$ua_skip_pref30_96:
-	LWHI	t2, 8(a1)
-	LWLO	t2, 11(a1)
-	LWHI	t3, 12(a1)
-	LWLO	t3, 15(a1)
-	LWHI	t4, 16(a1)
-	LWLO	t4, 19(a1)
-	LWHI	t5, 20(a1)
-	LWLO	t5, 23(a1)
-	LWHI	t6, 24(a1)
-	LWLO	t6, 27(a1)
-	LWHI	t7, 28(a1)
-	LWLO	t7, 31(a1)
-        pref    0, 128(a1)    /* bring the next lines of src, addr 128 */
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-
-	LWHI	t0, 32(a1)
-	LWLO	t0, 35(a1)
-	LWHI	t1, 36(a1)
-	bgtz	v1, $ua_skip_pref30_128
-	LWLO	t1, 39(a1)
-	pref    30, 128(a0)   /* continue setting up the dest, addr 128 */
-$ua_skip_pref30_128:
-	LWHI	t2, 40(a1)
-	LWLO	t2, 43(a1)
-	LWHI	t3, 44(a1)
-	LWLO	t3, 47(a1)
-	LWHI	t4, 48(a1)
-	LWLO	t4, 51(a1)
-	LWHI	t5, 52(a1)
-	LWLO	t5, 55(a1)
-	LWHI	t6, 56(a1)
-	LWLO	t6, 59(a1)
-	LWHI	t7, 60(a1)
-	LWLO	t7, 63(a1)
-        pref    0, 160(a1)    /* bring the next lines of src, addr 160 */
-
-	sw	t0, 32(a0)
-	sw	t1, 36(a0)
-	sw	t2, 40(a0)
-	sw	t3, 44(a0)
-	sw	t4, 48(a0)
-	sw	t5, 52(a0)
-	sw	t6, 56(a0)
-	sw	t7, 60(a0)
-
-	addiu	a0, a0, 64	/* adding 64 to dest */
-	sgtu	v1, a0, t9
-	bne	a0, a3, $ua_loop16w
-	addiu	a1, a1, 64	/* adding 64 to src */
-	move	a2, t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes to go */
-
-$ua_chk8w:
-	pref 0, 0x0(a1)
-	andi	t8, a2, 0x1f	/* is there a 32-byte chunk? */
-				/* the t8 is the reminder count */
-	beq	a2, t8, $ua_chk1w	/* when a2=t8, no 32-byte chunk */
-
-	LWHI	t0, 0(a1)
-	LWLO	t0, 3(a1)
-	LWHI	t1, 4(a1)
-	LWLO	t1, 7(a1)
-	LWHI	t2, 8(a1)
-	LWLO	t2, 11(a1)
-	LWHI	t3, 12(a1)
-	LWLO	t3, 15(a1)
-	LWHI	t4, 16(a1)
-	LWLO	t4, 19(a1)
-	LWHI	t5, 20(a1)
-	LWLO	t5, 23(a1)
-	LWHI	t6, 24(a1)
-	LWLO	t6, 27(a1)
-	LWHI	t7, 28(a1)
-	LWLO	t7, 31(a1)
-	addiu	a1, a1, 32
-
-	sw	t0, 0(a0)
-	sw	t1, 4(a0)
-	sw	t2, 8(a0)
-	sw	t3, 12(a0)
-	sw	t4, 16(a0)
-	sw	t5, 20(a0)
-	sw	t6, 24(a0)
-	sw	t7, 28(a0)
-	addiu	a0, a0, 32
-
-$ua_chk1w:
-	andi	a2, t8, 0x3	/* now a2 is the reminder past 1w chunks */
-	beq	a2, t8, $ua_smallCopy
-	subu	a3, t8, a2	/* a3 is count of bytes in 1w chunks */
-	addu	a3, a0, a3	/* now a3 is the dst address past the 1w chunks */
-
-/* copying in words (4-byte chunks) */
-$ua_wordCopy_loop:
-	LWHI	v1, 0(a1)
-	LWLO	v1, 3(a1)
-	addiu	a1, a1, 4
-	addiu	a0, a0, 4		/* note: dst=a0 is word aligned here, see NOTE1 */
-	bne	a0, a3, $ua_wordCopy_loop
-	sw	v1, -4(a0)
-
-/* Now less than 4 bytes (value in a2) left to copy */
-$ua_smallCopy:
-	beqz	a2, leave
-	addu	a3, a0, a2	/* a3 is the last dst address */
-$ua_smallCopy_loop:
-	lb	v1, 0(a1)
-	addiu	a1, a1, 1
-	addiu	a0, a0, 1
-	bne	a0, a3, $ua_smallCopy_loop
-	sb	v1, -1(a0)
-
-	j	ra
-	nop
-
-END(pixman_mips_fast_memcpy)
diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c
index 3048813..e9cf2e9 100644
--- a/pixman/pixman-mips.c
+++ b/pixman/pixman-mips.c
@@ -24,14 +24,30 @@
 #endif
 
 #include "pixman-private.h"
-
-#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI)
-
 #include <string.h>
 #include <stdlib.h>
 
+#ifdef USE_MIPS_DSPR2
+static const char *mips_dspr2_cores[] = {"MIPS 74K", NULL};
+#endif
+
+#ifdef USE_MIPS32R2
+static const char *mips32r2_cores[] = {"MIPS 1004K", "MIPS 74K", "MIPS 34K",
+                                       "MIPS 24K", "MIPS 4Kc", "MIPS 4Km",
+                                       "MIPS 4Kp", "MIPS 4KEc", "MIPS 4KEm",
+                                       "MIPS 4KEp", "MIPS 4KSc", "MIPS 4KSd",
+                                        NULL};
+#endif
+
+#ifdef USE_LOONGSON_MMI
+static const char *mips_loongson_cores[] = {"Loongson", NULL};
+#endif
+
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS32R2) || \
+    defined(USE_LOONGSON_MMI)
+
 static pixman_bool_t
-have_feature (const char *search_string)
+have_feature (const char **cores)
 {
 #if defined (__linux__) /* linux ELF */
     /* Simple detection of MIPS features at runtime for Linux.
@@ -47,13 +63,18 @@ have_feature (const char *search_string)
     if ((f = fopen (file_name, "r")) == NULL)
         return FALSE;
 
-    while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL)
+    while (*cores)
     {
-        if (strstr (cpuinfo_line, search_string) != NULL)
+        while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL)
         {
-            fclose (f);
-            return TRUE;
+            if (strstr (cpuinfo_line, *cores) != NULL)
+            {
+                fclose (f);
+                return TRUE;
+            }
         }
+        rewind (f);
+        cores++;
     }
 
     fclose (f);
@@ -70,10 +91,26 @@ _pixman_mips_get_implementations (pixman_implementation_t *imp)
 {
 #ifdef USE_LOONGSON_MMI
     /* I really don't know if some Loongson CPUs don't have MMI. */
-    if (!_pixman_disabled ("loongson-mmi") && have_feature ("Loongson"))
+    if (!_pixman_disabled ("loongson-mmi") &&
+        have_feature (mips_loongson_cores))
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
 
+#ifdef USE_MIPS32R2
+    if (!_pixman_disabled ("mips32r2"))
+    {
+        int already_compiling_everything_for_mips32r2 = 0;
+#if defined(__mips__) && (__mips_isa_rev >= 2)
+        already_compiling_everything_for_mips32r2 = 1;
+#endif
+        if (already_compiling_everything_for_mips32r2 ||
+            have_feature (mips32r2_cores))
+        {
+            imp = _pixman_implementation_create_mips32r2 (imp);
+        }
+    }
+#endif
+
 #ifdef USE_MIPS_DSPR2
     if (!_pixman_disabled ("mips-dspr2"))
     {
@@ -82,8 +119,7 @@ _pixman_mips_get_implementations (pixman_implementation_t *imp)
 	already_compiling_everything_for_dspr2 = 1;
 #endif
 	if (already_compiling_everything_for_dspr2 ||
-	    /* Only currently available MIPS core that supports DSPr2 is 74K. */
-	    have_feature ("MIPS 74K"))
+	    have_feature (mips_dspr2_cores))
 	{
 	    imp = _pixman_implementation_create_mips_dspr2 (imp);
 	}
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
new file mode 100644
index 0000000..3f73e41
--- /dev/null
+++ b/pixman/pixman-mips32r2-asm.S
@@ -0,0 +1,519 @@
+/*
+ * Copyright (c) 2012-2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "pixman-mips32r2-asm.h"
+
+/*
+ * This routine could be optimized for MIPS64. The current code only
+ * uses MIPS32 instructions.
+ */
+
+#ifdef EB
+#  define LWHI	lwl		/* high part is left in big-endian */
+#  define SWHI	swl		/* high part is left in big-endian */
+#  define LWLO	lwr		/* low part is right in big-endian */
+#  define SWLO	swr		/* low part is right in big-endian */
+#else
+#  define LWHI	lwr		/* high part is right in little-endian */
+#  define SWHI	swr		/* high part is right in little-endian */
+#  define LWLO	lwl		/* low part is left in big-endian */
+#  define SWLO	swl		/* low part is left in big-endian */
+#endif
+
+LEAF_MIPS32R2(pixman_fast_memcpy)
+
+	slti	AT, a2, 8
+	bne	AT, zero, $last8
+	move	v0, a0	/* memcpy returns the dst pointer */
+
+/* Test if the src and dst are word-aligned, or can be made word-aligned */
+	xor	t8, a1, a0
+	andi	t8, t8, 0x3		/* t8 is a0/a1 word-displacement */
+
+	bne	t8, zero, $unaligned
+	negu	a3, a0
+
+	andi	a3, a3, 0x3	/* we need to copy a3 bytes to make a0/a1 aligned */
+	beq	a3, zero, $chk16w	/* when a3=0 then the dst (a0) is word-aligned */
+	subu	a2, a2, a3	/* now a2 is the remining bytes count */
+
+	LWHI	t8, 0(a1)
+	addu	a1, a1, a3
+	SWHI	t8, 0(a0)
+	addu	a0, a0, a3
+
+/* Now the dst/src are mutually word-aligned with word-aligned addresses */
+$chk16w:	andi	t8, a2, 0x3f	/* any whole 64-byte chunks? */
+				/* t8 is the byte count after 64-byte chunks */
+
+	beq	a2, t8, $chk8w	/* if a2==t8, no 64-byte chunks */
+				/* There will be at most 1 32-byte chunk after it */
+	subu	a3, a2, t8	/* subtract from a2 the reminder */
+                                /* Here a3 counts bytes in 16w chunks */
+	addu	a3, a0, a3	/* Now a3 is the final dst after 64-byte chunks */
+
+	addu	t0, a0, a2	/* t0 is the "past the end" address */
+
+/*
+ * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
+ * the "t0-32" address
+ * This means: for x=128 the last "safe" a0 address is "t0-160"
+ * Alternatively, for x=64 the last "safe" a0 address is "t0-96"
+ * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
+ */
+	subu	t9, t0, 160	/* t9 is the "last safe pref 30, 128(a0)" address */
+
+	pref    0, 0(a1)		/* bring the first line of src, addr 0 */
+	pref    0, 32(a1)	/* bring the second line of src, addr 32 */
+	pref    0, 64(a1)	/* bring the third line of src, addr 64 */
+	pref	30, 32(a0)	/* safe, as we have at least 64 bytes ahead */
+/* In case the a0 > t9 don't use "pref 30" at all */
+	sgtu	v1, a0, t9
+	bgtz	v1, $loop16w	/* skip "pref 30, 64(a0)" for too short arrays */
+	nop
+/* otherwise, start with using pref30 */
+	pref	30, 64(a0)
+$loop16w:
+	pref	0, 96(a1)
+	lw	t0, 0(a1)
+	bgtz	v1, $skip_pref30_96	/* skip "pref 30, 96(a0)" */
+	lw	t1, 4(a1)
+	pref    30, 96(a0)   /* continue setting up the dest, addr 96 */
+$skip_pref30_96:
+	lw	t2, 8(a1)
+	lw	t3, 12(a1)
+	lw	t4, 16(a1)
+	lw	t5, 20(a1)
+	lw	t6, 24(a1)
+	lw	t7, 28(a1)
+        pref    0, 128(a1)    /* bring the next lines of src, addr 128 */
+
+	sw	t0, 0(a0)
+	sw	t1, 4(a0)
+	sw	t2, 8(a0)
+	sw	t3, 12(a0)
+	sw	t4, 16(a0)
+	sw	t5, 20(a0)
+	sw	t6, 24(a0)
+	sw	t7, 28(a0)
+
+	lw	t0, 32(a1)
+	bgtz	v1, $skip_pref30_128	/* skip "pref 30, 128(a0)" */
+	lw	t1, 36(a1)
+	pref    30, 128(a0)   /* continue setting up the dest, addr 128 */
+$skip_pref30_128:
+	lw	t2, 40(a1)
+	lw	t3, 44(a1)
+	lw	t4, 48(a1)
+	lw	t5, 52(a1)
+	lw	t6, 56(a1)
+	lw	t7, 60(a1)
+        pref    0, 160(a1)    /* bring the next lines of src, addr 160 */
+
+	sw	t0, 32(a0)
+	sw	t1, 36(a0)
+	sw	t2, 40(a0)
+	sw	t3, 44(a0)
+	sw	t4, 48(a0)
+	sw	t5, 52(a0)
+	sw	t6, 56(a0)
+	sw	t7, 60(a0)
+
+	addiu	a0, a0, 64	/* adding 64 to dest */
+	sgtu	v1, a0, t9
+	bne	a0, a3, $loop16w
+	addiu	a1, a1, 64	/* adding 64 to src */
+	move	a2, t8
+
+/* Here we have src and dest word-aligned but less than 64-bytes to go */
+
+$chk8w:
+	pref 0, 0x0(a1)
+	andi	t8, a2, 0x1f	/* is there a 32-byte chunk? */
+				/* the t8 is the reminder count past 32-bytes */
+	beq	a2, t8, $chk1w	/* when a2=t8, no 32-byte chunk */
+	 nop
+
+	lw	t0, 0(a1)
+	lw	t1, 4(a1)
+	lw	t2, 8(a1)
+	lw	t3, 12(a1)
+	lw	t4, 16(a1)
+	lw	t5, 20(a1)
+	lw	t6, 24(a1)
+	lw	t7, 28(a1)
+	addiu	a1, a1, 32
+
+	sw	t0, 0(a0)
+	sw	t1, 4(a0)
+	sw	t2, 8(a0)
+	sw	t3, 12(a0)
+	sw	t4, 16(a0)
+	sw	t5, 20(a0)
+	sw	t6, 24(a0)
+	sw	t7, 28(a0)
+	addiu	a0, a0, 32
+
+$chk1w:
+	andi	a2, t8, 0x3	/* now a2 is the reminder past 1w chunks */
+	beq	a2, t8, $last8
+	subu	a3, t8, a2	/* a3 is count of bytes in 1w chunks */
+	addu	a3, a0, a3	/* now a3 is the dst address past the 1w chunks */
+
+/* copying in words (4-byte chunks) */
+$wordCopy_loop:
+	lw	t3, 0(a1)	/* the first t3 may be equal t0 ... optimize? */
+	addiu	a1, a1, 4
+	addiu	a0, a0, 4
+	bne	a0, a3, $wordCopy_loop
+	sw	t3, -4(a0)
+
+/* For the last (<8) bytes */
+$last8:
+	blez	a2, leave
+	addu	a3, a0, a2	/* a3 is the last dst address */
+$last8loop:
+	lb	v1, 0(a1)
+	addiu	a1, a1, 1
+	addiu	a0, a0, 1
+	bne	a0, a3, $last8loop
+	sb	v1, -1(a0)
+
+leave:	j	ra
+	nop
+
+/*
+ * UNALIGNED case
+ */
+
+$unaligned:
+	/* got here with a3="negu a0" */
+	andi	a3, a3, 0x3	/* test if the a0 is word aligned */
+	beqz	a3, $ua_chk16w
+	subu	a2, a2, a3	/* bytes left after initial a3 bytes */
+
+	LWHI	v1, 0(a1)
+	LWLO	v1, 3(a1)
+	addu	a1, a1, a3	/* a3 may be here 1, 2 or 3 */
+	SWHI	v1, 0(a0)
+	addu	a0, a0, a3	/* below the dst will be word aligned (NOTE1) */
+
+$ua_chk16w:	andi	t8, a2, 0x3f	/* any whole 64-byte chunks? */
+				/* t8 is the byte count after 64-byte chunks */
+	beq	a2, t8, $ua_chk8w	/* if a2==t8, no 64-byte chunks */
+				/* There will be at most 1 32-byte chunk after it */
+	subu	a3, a2, t8	/* subtract from a2 the reminder */
+                                /* Here a3 counts bytes in 16w chunks */
+	addu	a3, a0, a3	/* Now a3 is the final dst after 64-byte chunks */
+
+	addu	t0, a0, a2	/* t0 is the "past the end" address */
+
+	subu	t9, t0, 160	/* t9 is the "last safe pref 30, 128(a0)" address */
+
+	pref    0, 0(a1)		/* bring the first line of src, addr 0 */
+	pref    0, 32(a1)	/* bring the second line of src, addr 32 */
+	pref    0, 64(a1)	/* bring the third line of src, addr 64 */
+	pref	30, 32(a0)	/* safe, as we have at least 64 bytes ahead */
+/* In case the a0 > t9 don't use "pref 30" at all */
+	sgtu	v1, a0, t9
+	bgtz	v1, $ua_loop16w	/* skip "pref 30, 64(a0)" for too short arrays */
+	nop
+/* otherwise,  start with using pref30 */
+	pref	30, 64(a0)
+$ua_loop16w:
+	pref	0, 96(a1)
+	LWHI	t0, 0(a1)
+	LWLO	t0, 3(a1)
+	LWHI	t1, 4(a1)
+	bgtz	v1, $ua_skip_pref30_96
+	LWLO	t1, 7(a1)
+	pref    30, 96(a0)   /* continue setting up the dest, addr 96 */
+$ua_skip_pref30_96:
+	LWHI	t2, 8(a1)
+	LWLO	t2, 11(a1)
+	LWHI	t3, 12(a1)
+	LWLO	t3, 15(a1)
+	LWHI	t4, 16(a1)
+	LWLO	t4, 19(a1)
+	LWHI	t5, 20(a1)
+	LWLO	t5, 23(a1)
+	LWHI	t6, 24(a1)
+	LWLO	t6, 27(a1)
+	LWHI	t7, 28(a1)
+	LWLO	t7, 31(a1)
+        pref    0, 128(a1)    /* bring the next lines of src, addr 128 */
+
+	sw	t0, 0(a0)
+	sw	t1, 4(a0)
+	sw	t2, 8(a0)
+	sw	t3, 12(a0)
+	sw	t4, 16(a0)
+	sw	t5, 20(a0)
+	sw	t6, 24(a0)
+	sw	t7, 28(a0)
+
+	LWHI	t0, 32(a1)
+	LWLO	t0, 35(a1)
+	LWHI	t1, 36(a1)
+	bgtz	v1, $ua_skip_pref30_128
+	LWLO	t1, 39(a1)
+	pref    30, 128(a0)   /* continue setting up the dest, addr 128 */
+$ua_skip_pref30_128:
+	LWHI	t2, 40(a1)
+	LWLO	t2, 43(a1)
+	LWHI	t3, 44(a1)
+	LWLO	t3, 47(a1)
+	LWHI	t4, 48(a1)
+	LWLO	t4, 51(a1)
+	LWHI	t5, 52(a1)
+	LWLO	t5, 55(a1)
+	LWHI	t6, 56(a1)
+	LWLO	t6, 59(a1)
+	LWHI	t7, 60(a1)
+	LWLO	t7, 63(a1)
+        pref    0, 160(a1)    /* bring the next lines of src, addr 160 */
+
+	sw	t0, 32(a0)
+	sw	t1, 36(a0)
+	sw	t2, 40(a0)
+	sw	t3, 44(a0)
+	sw	t4, 48(a0)
+	sw	t5, 52(a0)
+	sw	t6, 56(a0)
+	sw	t7, 60(a0)
+
+	addiu	a0, a0, 64	/* adding 64 to dest */
+	sgtu	v1, a0, t9
+	bne	a0, a3, $ua_loop16w
+	addiu	a1, a1, 64	/* adding 64 to src */
+	move	a2, t8
+
+/* Here we have src and dest word-aligned but less than 64-bytes to go */
+
+$ua_chk8w:
+	pref 0, 0x0(a1)
+	andi	t8, a2, 0x1f	/* is there a 32-byte chunk? */
+				/* the t8 is the reminder count */
+	beq	a2, t8, $ua_chk1w	/* when a2=t8, no 32-byte chunk */
+
+	LWHI	t0, 0(a1)
+	LWLO	t0, 3(a1)
+	LWHI	t1, 4(a1)
+	LWLO	t1, 7(a1)
+	LWHI	t2, 8(a1)
+	LWLO	t2, 11(a1)
+	LWHI	t3, 12(a1)
+	LWLO	t3, 15(a1)
+	LWHI	t4, 16(a1)
+	LWLO	t4, 19(a1)
+	LWHI	t5, 20(a1)
+	LWLO	t5, 23(a1)
+	LWHI	t6, 24(a1)
+	LWLO	t6, 27(a1)
+	LWHI	t7, 28(a1)
+	LWLO	t7, 31(a1)
+	addiu	a1, a1, 32
+
+	sw	t0, 0(a0)
+	sw	t1, 4(a0)
+	sw	t2, 8(a0)
+	sw	t3, 12(a0)
+	sw	t4, 16(a0)
+	sw	t5, 20(a0)
+	sw	t6, 24(a0)
+	sw	t7, 28(a0)
+	addiu	a0, a0, 32
+
+$ua_chk1w:
+	andi	a2, t8, 0x3	/* now a2 is the reminder past 1w chunks */
+	beq	a2, t8, $ua_smallCopy
+	subu	a3, t8, a2	/* a3 is count of bytes in 1w chunks */
+	addu	a3, a0, a3	/* now a3 is the dst address past the 1w chunks */
+
+/* copying in words (4-byte chunks) */
+$ua_wordCopy_loop:
+	LWHI	v1, 0(a1)
+	LWLO	v1, 3(a1)
+	addiu	a1, a1, 4
+	addiu	a0, a0, 4		/* note: dst=a0 is word aligned here, see NOTE1 */
+	bne	a0, a3, $ua_wordCopy_loop
+	sw	v1, -4(a0)
+
+/* Now less than 4 bytes (value in a2) left to copy */
+$ua_smallCopy:
+	beqz	a2, leave
+	addu	a3, a0, a2	/* a3 is the last dst address */
+$ua_smallCopy_loop:
+	lb	v1, 0(a1)
+	addiu	a1, a1, 1
+	addiu	a0, a0, 1
+	bne	a0, a3, $ua_smallCopy_loop
+	sb	v1, -1(a0)
+
+	j	ra
+	nop
+
+END_MIPS32R2(pixman_fast_memcpy)
+
+LEAF_MIPS32R2(pixman_fill_buff32)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     nop
+    srl      t1, a1, 5 /* t1 how many multiples of 32 bytes */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -4
+    sw       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 4
+3:
+    jr       ra
+     nop
+
+END_MIPS32R2(pixman_fill_buff32)
+
+LEAF_MIPS32R2(pixman_composite_src_x888_8888_asm)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (x8r8g8b8)
+ * a2 - w
+ */
+
+    beqz     a2, 4f
+     nop
+    li       t9, 0xff000000
+    srl      t8, a2, 3    /* t1 = how many multiples of 8 src pixels */
+    beqz     t8, 3f       /* branch if less than 8 src pixels */
+     nop
+1:
+    addiu    t8, t8, -1
+    beqz     t8, 2f
+     addiu   a2, a2, -8
+    pref     0, 32(a1)
+    lw       t0, 0(a1)
+    lw       t1, 4(a1)
+    lw       t2, 8(a1)
+    lw       t3, 12(a1)
+    lw       t4, 16(a1)
+    lw       t5, 20(a1)
+    lw       t6, 24(a1)
+    lw       t7, 28(a1)
+    addiu    a1, a1, 32
+    or       t0, t0, t9
+    or       t1, t1, t9
+    or       t2, t2, t9
+    or       t3, t3, t9
+    or       t4, t4, t9
+    or       t5, t5, t9
+    or       t6, t6, t9
+    or       t7, t7, t9
+    pref     30, 32(a0)
+    sw       t0, 0(a0)
+    sw       t1, 4(a0)
+    sw       t2, 8(a0)
+    sw       t3, 12(a0)
+    sw       t4, 16(a0)
+    sw       t5, 20(a0)
+    sw       t6, 24(a0)
+    sw       t7, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+2:
+    lw       t0, 0(a1)
+    lw       t1, 4(a1)
+    lw       t2, 8(a1)
+    lw       t3, 12(a1)
+    lw       t4, 16(a1)
+    lw       t5, 20(a1)
+    lw       t6, 24(a1)
+    lw       t7, 28(a1)
+    addiu    a1, a1, 32
+    or       t0, t0, t9
+    or       t1, t1, t9
+    or       t2, t2, t9
+    or       t3, t3, t9
+    or       t4, t4, t9
+    or       t5, t5, t9
+    or       t6, t6, t9
+    or       t7, t7, t9
+    sw       t0, 0(a0)
+    sw       t1, 4(a0)
+    sw       t2, 8(a0)
+    sw       t3, 12(a0)
+    sw       t4, 16(a0)
+    sw       t5, 20(a0)
+    sw       t6, 24(a0)
+    sw       t7, 28(a0)
+    beqz     a2, 4f
+     addiu   a0, a0, 32
+3:
+    lw       t0, 0(a1)
+    addiu    a1, a1, 4
+    addiu    a2, a2, -1
+    or       t1, t0, t9
+    sw       t1, 0(a0)
+    bnez     a2, 3b
+     addiu   a0, a0, 4
+4:
+    jr       ra
+     nop
+
+END_MIPS32R2(pixman_composite_src_x888_8888_asm)
diff --git a/pixman/pixman-mips32r2-asm.h b/pixman/pixman-mips32r2-asm.h
new file mode 100644
index 0000000..2b88c00
--- /dev/null
+++ b/pixman/pixman-mips32r2-asm.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2012-2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#ifndef PIXMAN_MIPS32R2_ASM_H
+#define PIXMAN_MIPS32R2_ASM_H
+
+#include "pixman-mips-common-asm.h"
+
+/*
+ * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
+ * returned in (out_8888) register. Requires two temporary registers
+ * (scratch1 and scratch2).
+ */
+.macro CONVERT_1x0565_TO_1x8888 in_565,   \
+                                out_8888, \
+                                scratch1, scratch2
+    lui     \out_8888, 0xff00
+    sll     \scratch1, \in_565,   0x3
+    andi    \scratch2, \scratch1, 0xff
+    ext     \scratch1, \in_565,   0x2, 0x3
+    or      \scratch1, \scratch2, \scratch1
+    or      \out_8888, \out_8888, \scratch1
+
+    sll     \scratch1, \in_565,   0x5
+    andi    \scratch1, \scratch1, 0xfc00
+    srl     \scratch2, \in_565,   0x1
+    andi    \scratch2, \scratch2, 0x300
+    or      \scratch2, \scratch1, \scratch2
+    or      \out_8888, \out_8888, \scratch2
+
+    andi    \scratch1, \in_565,   0xf800
+    srl     \scratch2, \scratch1, 0x5
+    andi    \scratch2, \scratch2, 0xff00
+    or      \scratch1, \scratch1, \scratch2
+    sll     \scratch1, \scratch1, 0x8
+    or      \out_8888, \out_8888, \scratch1
+.endm
+
+/*
+ * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
+ * returned in (out_565) register. Requires two temporary registers
+ * (scratch1 and scratch2).
+ */
+.macro CONVERT_1x8888_TO_1x0565 in_8888, \
+                                out_565, \
+                                scratch1, scratch2
+    ext     \out_565,  \in_8888,  0x3, 0x5
+    srl     \scratch1, \in_8888,  0x5
+    andi    \scratch1, \scratch1, 0x07e0
+    srl     \scratch2, \in_8888,  0x8
+    andi    \scratch2, \scratch2, 0xf800
+    or      \out_565,  \out_565,  \scratch1
+    or      \out_565,  \out_565,  \scratch2
+.endm
+
+#endif /* PIXMAN_MIPS32R2_ASM_H */
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
new file mode 100644
index 0000000..4a6d29e
--- /dev/null
+++ b/pixman/pixman-mips32r2.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2012-2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+#include "pixman-mips-common.h"
+
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888,
+                                    uint32_t, 1, uint32_t, 1, _mips32r2)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565,
+                                    uint16_t, 1, uint16_t, 1, _mips32r2)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
+                                    uint32_t, 1, uint32_t, 1, _mips32r2)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
+                                    uint8_t, 3, uint8_t, 3, _mips32r2)
+
+static pixman_bool_t
+mips32r2_fill (pixman_implementation_t *imp,
+                 uint32_t *               bits,
+                 int                      stride,
+                 int                      bpp,
+                 int                      x,
+                 int                      y,
+                 int                      width,
+                 int                      height,
+                 uint32_t                 _xor)
+{
+    uint8_t *byte_line;
+    uint32_t byte_width;
+    int i, short_stride;
+    uint16_t *dst;
+    uint16_t v;
+
+    switch (bpp)
+    {
+    case 16:
+        short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+        dst = (uint16_t *)bits;
+        v = _xor & 0xffff;
+
+        dst = dst + y * short_stride + x;
+
+        while (height--)
+        {
+            for (i = 0; i < width; ++i)
+                dst[i] = v;
+
+            dst += short_stride;
+        }
+        return TRUE;
+    case 32:
+        stride = stride * (int) sizeof (uint32_t) / 4;
+        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+        byte_width = width * 4;
+        stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff32_mips32r2 (dst, byte_width, _xor);
+        }
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static pixman_bool_t
+mips32r2_blt (pixman_implementation_t *imp,
+                uint32_t *               src_bits,
+                uint32_t *               dst_bits,
+                int                      src_stride,
+                int                      dst_stride,
+                int                      src_bpp,
+                int                      dst_bpp,
+                int                      src_x,
+                int                      src_y,
+                int                      dest_x,
+                int                      dest_y,
+                int                      width,
+                int                      height)
+{
+    if (src_bpp != dst_bpp)
+        return FALSE;
+
+    uint8_t *src_bytes;
+    uint8_t *dst_bytes;
+    uint32_t byte_width;
+    int32_t temp;
+
+    temp = src_bpp >> 3;
+
+    src_stride = src_stride * (int) sizeof (uint32_t) / temp;
+    dst_stride = dst_stride * (int) sizeof (uint32_t) / temp;
+    if (src_bpp == 16)
+    {
+        src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+                                          + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+    }
+    else if (src_bpp == 32)
+    {
+        src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+                                           + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+    }
+    else
+        return FALSE;
+
+    byte_width = width * temp;
+    src_stride *= temp;
+    dst_stride *= temp;
+
+    while (height--)
+    {
+        uint8_t *src = src_bytes;
+        uint8_t *dst = dst_bytes;
+        src_bytes += src_stride;
+        dst_bytes += dst_stride;
+        pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+    }
+    return TRUE;
+}
+
+static const pixman_fast_path_t mips32r2_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH (SRC, r5g6b5,   null, r5g6b5,   mips_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, b5g6r5,   null, b5g6r5,   mips_composite_src_0565_0565),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
+    { PIXMAN_OP_NONE },
+};
+
+pixman_implementation_t *
+_pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp =
+        _pixman_implementation_create (fallback, mips32r2_fast_paths);
+
+    imp->blt = mips32r2_blt;
+    imp->fill = mips32r2_fill;
+
+    return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 9646605..1aacb8d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -608,6 +608,11 @@ pixman_implementation_t *
 _pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback);
 #endif
 
+#ifdef USE_MIPS32R2
+pixman_implementation_t *
+_pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback);
+#endif
+
 #ifdef USE_VMX
 pixman_implementation_t *
 _pixman_implementation_create_vmx (pixman_implementation_t *fallback);
-- 
1.7.3



More information about the Pixman mailing list