[Pixman] [PATCH 3/3] MIPS: DSPr1: Basic infrastructure for DSPr1 optimizations

Nemanja Lukic nemanja.lukic at rt-rk.com
Tue Jul 23 08:42:14 PDT 2013


Some of the optimizations introduced in previous DSPr2 commits, similar to
previous patch, were not DSPr2 specific and utilized DSPr1 instructions only.
Since Pixman's run-time CPU detection only added DSPr2 fast-paths on 74K MIPS
cores, these optimizations couldn't be used on cores that don't support DSPr2,
but do support DSPr1 instructions (these are newer MIPS CPU cores like 24KE,
34K, 1004K, etc).
This patch extracts those DSPr1 specific optimizations into new dspr1 set of
fast-paths, and adds infrastructure for future DSPr1-only optimizations with
appropriate build and run time support.
Following is the list of DSPr1 optimizations, introduced in previous DSPr2
patches, tested on MIPS 1004Kc core:

Performance numbers before/after on MIPS-1004kc @ 800 MHz

Referent (before):

add_8888_8888 =  L1:  26.47  L2:  19.70  M: 14.41 ( 42.65%)  HT: 13.73  VT: 13.45  R: 12.98  RT:  9.84 ( 105Kops/s)
   src_n_0565 =  L1: 341.86  L2: 194.28  M:135.85 (100.40%)  HT:115.85  VT:109.87  R:100.97  RT: 45.76 ( 252Kops/s)

Optimized (with these optimizations):

add_8888_8888 =  L1: 223.49  L2:  64.56  M: 29.42 ( 86.84%)  HT: 26.95  VT: 23.67  R: 23.73  RT: 15.20 ( 145Kops/s)
   src_n_0565 = L1:-1041.38  L2: 695.28  M:443.87 (327.47%)  HT:144.80  VT:131.38  R:122.05  RT: 45.08 ( 248Kops/s)
---
 configure.ac                    |   44 ++++++++
 pixman/Makefile.am              |   14 +++
 pixman/pixman-mips-common-asm.h |   13 +++
 pixman/pixman-mips-common.h     |    4 +-
 pixman/pixman-mips-dspr1-asm.S  |  166 ++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr1-asm.h  |   37 +++++++
 pixman/pixman-mips-dspr1.c      |  216 +++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.S  |  133 ------------------------
 pixman/pixman-mips-dspr2-asm.h  |    2 +-
 pixman/pixman-mips-dspr2.c      |   26 ++++-
 pixman/pixman-mips.c            |   21 ++++-
 pixman/pixman-mips32r2.c        |    2 +-
 pixman/pixman-private.h         |    5 +
 13 files changed, 539 insertions(+), 144 deletions(-)
 create mode 100644 pixman/pixman-mips-dspr1-asm.S
 create mode 100644 pixman/pixman-mips-dspr1-asm.h
 create mode 100644 pixman/pixman-mips-dspr1.c

diff --git a/configure.ac b/configure.ac
index 42510bc..ab179ca 100644
--- a/configure.ac
+++ b/configure.ac
@@ -704,6 +704,50 @@ if test $enable_mips32r2 = yes && test $have_mips32r2 = no ; then
 fi
 
 dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports MIPS DSPr1 instructions
+
+have_mips_dspr1=no
+AC_MSG_CHECKING(whether to use MIPS DSPr1 assembler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-mdsp $CFLAGS"
+
+AC_COMPILE_IFELSE([[
+#if !(defined(__mips__) && __mips_isa_rev >= 2)
+#error MIPS DSPr1 is currently only available on MIPS32r2 platforms.
+#endif
+int
+main () {
+    int c = 0, a = 0, b = 0;
+    __asm__ __volatile__ (
+      "packrl.ph %[c], %[a], %[b]          \n\t"
+      : [c] "=r" (c)
+      : [a] "r" (a), [b] "r" (b)
+  );
+  return c;
+}]], have_mips_dspr1=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(mips-dspr1,
+   [AC_HELP_STRING([--disable-mips-dspr1],
+                   [disable MIPS DSPr1 fast paths])],
+   [enable_mips_dspr1=$enableval], [enable_mips_dspr1=auto])
+
+if test $enable_mips_dspr1 = no ; then
+   have_mips_dspr1=disabled
+fi
+
+if test $have_mips_dspr1 = yes ; then
+   AC_DEFINE(USE_MIPS_DSPR1, 1, [use MIPS DSPr1 assembly optimizations])
+fi
+
+AM_CONDITIONAL(USE_MIPS_DSPR1, test $have_mips_dspr1 = yes)
+
+AC_MSG_RESULT($have_mips_dspr1)
+if test $enable_mips_dspr1 = yes && test $have_mips_dspr1 = no ; then
+   AC_MSG_ERROR([MIPS DSPr1 instructions not detected])
+fi
+
+dnl ==========================================================================
 dnl Check if assembler is gas compatible and supports MIPS DSPr2 instructions
 
 have_mips_dspr2=no
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 894d111..d7d020a 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -114,6 +114,20 @@ libpixman_1_la_LIBADD += libpixman-mips32r2.la
 ASM_CFLAGS_mips32r2=
 endif
 
+# mips dspr1 code
+if USE_MIPS_DSPR1
+noinst_LTLIBRARIES += libpixman-mips-dspr1.la
+libpixman_mips_dspr1_la_SOURCES = \
+        pixman-mips-dspr1.c \
+        pixman-mips-common.h \
+        pixman-mips-common-asm.h \
+        pixman-mips-dspr1-asm.S \
+        pixman-mips-dspr1-asm.h
+libpixman_1_la_LIBADD += libpixman-mips-dspr1.la
+
+ASM_CFLAGS_mips_dspr1=
+endif
+
 # mips dspr2 code
 if USE_MIPS_DSPR2
 noinst_LTLIBRARIES += libpixman-mips-dspr2.la
diff --git a/pixman/pixman-mips-common-asm.h b/pixman/pixman-mips-common-asm.h
index 3432aa7..a2d183d 100644
--- a/pixman/pixman-mips-common-asm.h
+++ b/pixman/pixman-mips-common-asm.h
@@ -88,6 +88,13 @@ symbol##suffix: .frame  sp, 0, ra;                      \
 LEAF_MIPS(symbol, _mips32r2)
 
 /*
+ * LEAF_MIPS_DSPR1 - declare leaf routine for MIPS DSPr1
+ */
+#define LEAF_MIPS_DSPR1(symbol)                         \
+LEAF_MIPS(symbol, _mips_dspr1)                          \
+                .set    dsp;
+
+/*
  * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
  */
 #define LEAF_MIPS_DSPR2(symbol)                         \
@@ -109,6 +116,12 @@ LEAF_MIPS(symbol, _mips_dspr2)                          \
 END(function, _mips32r2)
 
 /*
+ * END_MIPS_DSPR1 - mark end of mips_dspr1 function
+ */
+#define END_MIPS_DSPR1(function)                        \
+END(function, _mips_dspr1)
+
+/*
  * END_MIPS_DSPR2 - mark end of mips_dspr2 function
  */
 #define END_MIPS_DSPR2(function)                        \
diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
index 36bd98b..e49657a 100644
--- a/pixman/pixman-mips-common.h
+++ b/pixman/pixman-mips-common.h
@@ -46,9 +46,9 @@ pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
 void
 pixman_fill_buff16_mips32r2 (void *dst, uint32_t n_bytes, uint16_t value);
 
-#ifdef USE_MIPS_DSPR2
+#ifdef USE_MIPS_DSPR1
 void
-pixman_fill_buff16_mips_dspr2 (void *dst, uint32_t n_bytes, uint16_t value);
+pixman_fill_buff16_mips_dspr1 (void *dst, uint32_t n_bytes, uint16_t value);
 #endif
 
 /****************************************************************/
diff --git a/pixman/pixman-mips-dspr1-asm.S b/pixman/pixman-mips-dspr1-asm.S
new file mode 100644
index 0000000..7cde799
--- /dev/null
+++ b/pixman/pixman-mips-dspr1-asm.S
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#include "pixman-private.h"
+#include "pixman-mips-dspr1-asm.h"
+
+LEAF_MIPS_DSPR1(pixman_fill_buff16)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+    beqz     a1, 3f
+     andi    t1, a0, 0x0002
+    beqz     t1, 0f          /* check if address is 4-byte aligned */
+     nop
+    sh       a2, 0(a0)
+    addiu    a0, a0, 2
+    addiu    a1, a1, -2
+0:
+    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
+    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
+    beqz     t1, 2f
+     nop
+1:
+    addiu    t1, t1, -1
+    beqz     t1, 11f
+     addiu   a1, a1, -32
+    pref     30, 32(a0)
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    b        1b
+     addiu   a0, a0, 32
+11:
+    sw       a2, 0(a0)
+    sw       a2, 4(a0)
+    sw       a2, 8(a0)
+    sw       a2, 12(a0)
+    sw       a2, 16(a0)
+    sw       a2, 20(a0)
+    sw       a2, 24(a0)
+    sw       a2, 28(a0)
+    addiu    a0, a0, 32
+2:
+    blez     a1, 3f
+     addiu   a1, a1, -2
+    sh       a2, 0(a0)
+    b        2b
+     addiu   a0, a0, 2
+3:
+    jr       ra
+     nop
+
+END_MIPS_DSPR1(pixman_fill_buff16)
+
+LEAF_MIPS_DSPR1(pixman_composite_add_8888_8888_asm)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (a8r8g8b8)
+ * a2 - w
+ */
+
+    beqz         a2, 4f
+     nop
+
+    srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
+    beqz         t9, 3f         /* branch if less than 4 src pixels */
+     nop
+1:
+    addiu        t9, t9, -1
+    beqz         t9, 2f
+     addiu       a2, a2, -4
+
+    lw           t0, 0(a1)
+    lw           t1, 4(a1)
+    lw           t2, 8(a1)
+    lw           t3, 12(a1)
+    lw           t4, 0(a0)
+    lw           t5, 4(a0)
+    lw           t6, 8(a0)
+    lw           t7, 12(a0)
+    addiu        a1, a1, 16
+
+    addu_s.qb    t4, t4, t0
+    addu_s.qb    t5, t5, t1
+    addu_s.qb    t6, t6, t2
+    addu_s.qb    t7, t7, t3
+
+    sw           t4, 0(a0)
+    sw           t5, 4(a0)
+    sw           t6, 8(a0)
+    sw           t7, 12(a0)
+    b            1b
+     addiu       a0, a0, 16
+2:
+    lw           t0, 0(a1)
+    lw           t1, 4(a1)
+    lw           t2, 8(a1)
+    lw           t3, 12(a1)
+    lw           t4, 0(a0)
+    lw           t5, 4(a0)
+    lw           t6, 8(a0)
+    lw           t7, 12(a0)
+    addiu        a1, a1, 16
+
+    addu_s.qb    t4, t4, t0
+    addu_s.qb    t5, t5, t1
+    addu_s.qb    t6, t6, t2
+    addu_s.qb    t7, t7, t3
+
+    sw           t4, 0(a0)
+    sw           t5, 4(a0)
+    sw           t6, 8(a0)
+    sw           t7, 12(a0)
+
+    beqz         a2, 4f
+     addiu       a0, a0, 16
+3:
+    lw           t0, 0(a1)
+    lw           t1, 0(a0)
+    addiu        a1, a1, 4
+    addiu        a2, a2, -1
+    addu_s.qb    t1, t1, t0
+    sw           t1, 0(a0)
+    bnez         a2, 3b
+     addiu       a0, a0, 4
+4:
+    jr           ra
+     nop
+
+END_MIPS_DSPR1(pixman_composite_add_8888_8888_asm)
diff --git a/pixman/pixman-mips-dspr1-asm.h b/pixman/pixman-mips-dspr1-asm.h
new file mode 100644
index 0000000..0490c38
--- /dev/null
+++ b/pixman/pixman-mips-dspr1-asm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#ifndef PIXMAN_MIPS_DSPR1_ASM_H
+#define PIXMAN_MIPS_DSPR1_ASM_H
+
+#include "pixman-mips32r2-asm.h"
+
+#endif /* PIXMAN_MIPS_DSPR1_ASM_H */
diff --git a/pixman/pixman-mips-dspr1.c b/pixman/pixman-mips-dspr1.c
new file mode 100644
index 0000000..389d5de
--- /dev/null
+++ b/pixman/pixman-mips-dspr1.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 2013
+ *      MIPS Technologies, Inc., California.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Author:  Nemanja Lukic (nemanja.lukic at rt-rk.com)
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "pixman-private.h"
+#include "pixman-mips-common.h"
+
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
+                                    uint32_t, 1, uint32_t, 1, _mips_dspr1)
+
+static pixman_bool_t
+mips_dspr1_fill (pixman_implementation_t *imp,
+                 uint32_t *               bits,
+                 int                      stride,
+                 int                      bpp,
+                 int                      x,
+                 int                      y,
+                 int                      width,
+                 int                      height,
+                 uint32_t                 _xor)
+{
+    uint8_t *byte_line;
+    uint32_t byte_width;
+    int i;
+
+    switch (bpp)
+    {
+    case 16:
+#ifdef USE_MIPS_DSPR1 || USE_MIPS32R2
+        stride = stride * (int) sizeof (uint32_t) / 2;
+        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+        byte_width = width * 2;
+        stride *= 2;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+#ifdef USE_MIPS_DSPR1
+            pixman_fill_buff16_mips_dspr1 (dst, byte_width, _xor & 0xffff);
+#else
+            pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff);
+#endif
+        }
+#else
+        short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+        dst = (uint16_t *)bits;
+        v = _xor & 0xffff;
+
+        dst = dst + y * short_stride + x;
+
+        while (height--)
+        {
+            for (i = 0; i < width; ++i)
+                dst[i] = v;
+
+            dst += short_stride;
+        }
+#endif
+        return TRUE;
+    case 32:
+#ifdef USE_MIPS32R2
+        stride = stride * (int) sizeof (uint32_t) / 4;
+        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+        byte_width = width * 4;
+        stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *dst = byte_line;
+            byte_line += stride;
+            pixman_fill_buff32_mips32r2 (dst, byte_width, _xor);
+        }
+#else
+        bits = bits + y * stride + x;
+
+        while (height--)
+        {
+            for (i = 0; i < width; ++i)
+                bits[i] = _xor;
+
+            bits += stride;
+        }
+#endif
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static pixman_bool_t
+mips_dspr1_blt (pixman_implementation_t *imp,
+                uint32_t *               src_bits,
+                uint32_t *               dst_bits,
+                int                      src_stride,
+                int                      dst_stride,
+                int                      src_bpp,
+                int                      dst_bpp,
+                int                      src_x,
+                int                      src_y,
+                int                      dest_x,
+                int                      dest_y,
+                int                      width,
+                int                      height)
+{
+    if (src_bpp != dst_bpp)
+        return FALSE;
+
+    uint8_t *src_bytes;
+    uint8_t *dst_bytes;
+    uint32_t byte_width;
+
+    switch (src_bpp)
+    {
+    case 16:
+        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+        src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+                                          + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+        byte_width = width * 2;
+        src_stride *= 2;
+        dst_stride *= 2;
+
+        while (height--)
+        {
+            uint8_t *src = src_bytes;
+            uint8_t *dst = dst_bytes;
+            src_bytes += src_stride;
+            dst_bytes += dst_stride;
+#ifdef USE_MIPS32R2
+            pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+#else
+            memcpy (dst, src, byte_width);
+#endif
+        }
+        return TRUE;
+    case 32:
+        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+        src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+                                           + src_stride * (src_y) + (src_x));
+        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+                                           + dst_stride * (dest_y) + (dest_x));
+        byte_width = width * 4;
+        src_stride *= 4;
+        dst_stride *= 4;
+
+        while (height--)
+        {
+            uint8_t *src = src_bytes;
+            uint8_t *dst = dst_bytes;
+            src_bytes += src_stride;
+            dst_bytes += dst_stride;
+#ifdef USE_MIPS32R2
+            pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+#else
+            memcpy (dst, src, byte_width);
+#endif
+        }
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static const pixman_fast_path_t mips_dspr1_fast_paths[] =
+{
+    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null, a8r8g8b8, mips_composite_add_8888_8888),
+    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null, a8b8g8r8, mips_composite_add_8888_8888),
+    { PIXMAN_OP_NONE },
+};
+
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspr1 (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp =
+        _pixman_implementation_create (fallback, mips_dspr1_fast_paths);
+
+    imp->blt = mips_dspr1_blt;
+    imp->fill = mips_dspr1_fill;
+
+    return imp;
+}
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 8ddde0f..8acfadd 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -32,62 +32,6 @@
 #include "pixman-private.h"
 #include "pixman-mips-dspr2-asm.h"
 
-LEAF_MIPS_DSPR2(pixman_fill_buff16)
-/*
- * a0 - *dest
- * a1 - count (bytes)
- * a2 - value to fill buffer with
- */
-
-    beqz     a1, 3f
-     andi    t1, a0, 0x0002
-    beqz     t1, 0f          /* check if address is 4-byte aligned */
-     nop
-    sh       a2, 0(a0)
-    addiu    a0, a0, 2
-    addiu    a1, a1, -2
-0:
-    srl      t1, a1, 5       /* t1 how many multiples of 32 bytes */
-    replv.ph a2, a2          /* replicate fill value (16bit) in a2 */
-    beqz     t1, 2f
-     nop
-1:
-    addiu    t1, t1, -1
-    beqz     t1, 11f
-     addiu   a1, a1, -32
-    pref     30, 32(a0)
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    b        1b
-     addiu   a0, a0, 32
-11:
-    sw       a2, 0(a0)
-    sw       a2, 4(a0)
-    sw       a2, 8(a0)
-    sw       a2, 12(a0)
-    sw       a2, 16(a0)
-    sw       a2, 20(a0)
-    sw       a2, 24(a0)
-    sw       a2, 28(a0)
-    addiu    a0, a0, 32
-2:
-    blez     a1, 3f
-     addiu   a1, a1, -2
-    sh       a2, 0(a0)
-    b        2b
-     addiu   a0, a0, 2
-3:
-    jr       ra
-     nop
-
-END_MIPS_DSPR2(pixman_fill_buff16)
-
 LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm)
 /*
  * a0 - dst (r5g6b5)
@@ -2632,83 +2576,6 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm)
 
 END_MIPS_DSPR2(pixman_composite_add_8_8_asm)
 
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (a8r8g8b8)
- * a2 - w
- */
-
-    beqz         a2, 4f
-     nop
-
-    srl          t9, a2, 2      /* t1 = how many multiples of 4 src pixels */
-    beqz         t9, 3f         /* branch if less than 4 src pixels */
-     nop
-1:
-    addiu        t9, t9, -1
-    beqz         t9, 2f
-     addiu       a2, a2, -4
-
-    lw           t0, 0(a1)
-    lw           t1, 4(a1)
-    lw           t2, 8(a1)
-    lw           t3, 12(a1)
-    lw           t4, 0(a0)
-    lw           t5, 4(a0)
-    lw           t6, 8(a0)
-    lw           t7, 12(a0)
-    addiu        a1, a1, 16
-
-    addu_s.qb    t4, t4, t0
-    addu_s.qb    t5, t5, t1
-    addu_s.qb    t6, t6, t2
-    addu_s.qb    t7, t7, t3
-
-    sw           t4, 0(a0)
-    sw           t5, 4(a0)
-    sw           t6, 8(a0)
-    sw           t7, 12(a0)
-    b            1b
-     addiu       a0, a0, 16
-2:
-    lw           t0, 0(a1)
-    lw           t1, 4(a1)
-    lw           t2, 8(a1)
-    lw           t3, 12(a1)
-    lw           t4, 0(a0)
-    lw           t5, 4(a0)
-    lw           t6, 8(a0)
-    lw           t7, 12(a0)
-    addiu        a1, a1, 16
-
-    addu_s.qb    t4, t4, t0
-    addu_s.qb    t5, t5, t1
-    addu_s.qb    t6, t6, t2
-    addu_s.qb    t7, t7, t3
-
-    sw           t4, 0(a0)
-    sw           t5, 4(a0)
-    sw           t6, 8(a0)
-    sw           t7, 12(a0)
-
-    beqz         a2, 4f
-     addiu       a0, a0, 16
-3:
-    lw           t0, 0(a1)
-    lw           t1, 0(a0)
-    addiu        a1, a1, 4
-    addiu        a2, a2, -1
-    addu_s.qb    t1, t1, t0
-    sw           t1, 0(a0)
-    bnez         a2, 3b
-     addiu       a0, a0, 4
-4:
-    jr           ra
-     nop
-
-END_MIPS_DSPR2(pixman_composite_add_8888_8888_asm)
-
 LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm)
 /*
  * a0 - dst  (r5g6b5)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 24812c2..266767a 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -32,7 +32,7 @@
 #ifndef PIXMAN_MIPS_DSPR2_ASM_H
 #define PIXMAN_MIPS_DSPR2_ASM_H
 
-#include "pixman-mips32r2-asm.h"
+#include "pixman-mips-dspr1-asm.h"
 
 /*
  * Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 7828907..b8035d5 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -56,8 +56,6 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
                                     uint32_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
                                     uint8_t, 1, uint8_t, 1, _mips_dspr2)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
-                                    uint32_t, 1, uint32_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565,
                                     uint8_t, 1, uint16_t, 1, _mips_dspr2)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888,
@@ -175,6 +173,7 @@ mips_dspr2_fill (pixman_implementation_t *imp,
     switch (bpp)
     {
     case 16:
+#ifdef USE_MIPS_DSPR1 || USE_MIPS32R2
         stride = stride * (int) sizeof (uint32_t) / 2;
         byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
         byte_width = width * 2;
@@ -184,8 +183,27 @@ mips_dspr2_fill (pixman_implementation_t *imp,
         {
             uint8_t *dst = byte_line;
             byte_line += stride;
-            pixman_fill_buff16_mips_dspr2 (dst, byte_width, _xor & 0xffff);
+#ifdef USE_MIPS_DSPR1
+            pixman_fill_buff16_mips_dspr1 (dst, byte_width, _xor & 0xffff);
+#else
+            pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff);
+#endif
+        }
+#else
+        short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+        dst = (uint16_t *)bits;
+        v = _xor & 0xffff;
+
+        dst = dst + y * short_stride + x;
+
+        while (height--)
+        {
+            for (i = 0; i < width; ++i)
+                dst[i] = v;
+
+            dst += short_stride;
         }
+#endif
         return TRUE;
     case 32:
 #ifdef USE_MIPS32R2
@@ -367,8 +385,6 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, solid,    a8r8g8b8, mips_composite_add_8888_n_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, solid,    a8b8g8r8, mips_composite_add_8888_n_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8,       null,     a8,       mips_composite_add_8_8),
-    PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, mips_composite_add_8888_8888),
-    PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, mips_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, r5g6b5,   mips_composite_out_reverse_8_0565),
     PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, b5g6r5,   mips_composite_out_reverse_8_0565),
     PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8,    null, a8r8g8b8, mips_composite_out_reverse_8_8888),
diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c
index ff792de..fb2672e 100644
--- a/pixman/pixman-mips.c
+++ b/pixman/pixman-mips.c
@@ -25,13 +25,15 @@
 
 #include "pixman-private.h"
 
-#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS32R2) || \
-    defined(USE_LOONGSON_MMI)
+#if defined(USE_MIPS_DSPR2) || defined(USE_MIPS_DSPR1) || \
+    defined(USE_MIPS32R2) || defined(USE_LOONGSON_MMI)
 
 #include <string.h>
 #include <stdlib.h>
 
 static const char *mips_dspr2_cores[] = {"MIPS 74K", NULL};
+static const char *mips_dspr1_cores[] = {"MIPS 1004K", "MIPS 74K", "MIPS 34K",
+                                         "MIPS 24KE", NULL};
 static const char *mips32r2_cores[] = {"MIPS 1004K", "MIPS 74K", "MIPS 34K",
                                        "MIPS 24K", "MIPS 4Kc", "MIPS 4Km",
                                        "MIPS 4Kp", "MIPS 4KEc", "MIPS 4KEm",
@@ -104,6 +106,21 @@ _pixman_mips_get_implementations (pixman_implementation_t *imp)
     }
 #endif
 
+#ifdef USE_MIPS_DSPR1
+    if (!_pixman_disabled ("mips-dspr1"))
+    {
+        int already_compiling_everything_for_dspr1 = 0;
+#if defined(__mips_dsp) && (__mips_dsp_rev >= 1)
+        already_compiling_everything_for_dspr1 = 1;
+#endif
+        if (already_compiling_everything_for_dspr1 ||
+            have_feature (mips_dspr1_cores))
+        {
+            imp = _pixman_implementation_create_mips_dspr1 (imp);
+        }
+    }
+#endif
+
 #ifdef USE_MIPS_DSPR2
     if (!_pixman_disabled ("mips-dspr2"))
     {
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
index 5f1b826..227e56b 100644
--- a/pixman/pixman-mips32r2.c
+++ b/pixman/pixman-mips32r2.c
@@ -73,7 +73,7 @@ mips32r2_fill (pixman_implementation_t *imp,
             byte_line += stride;
             pixman_fill_buff16_mips32r2 (dst, byte_width, _xor & 0xffff);
         }
-        return TRUE; 
+        return TRUE;
     case 32:
         stride = stride * (int) sizeof (uint32_t) / 4;
         byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index ffece0f..696118b 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -606,6 +606,11 @@ pixman_implementation_t *
 _pixman_implementation_create_mips_dspr2 (pixman_implementation_t *fallback);
 #endif
 
+#ifdef USE_MIPS_DSPR1
+pixman_implementation_t *
+_pixman_implementation_create_mips_dspr1 (pixman_implementation_t *fallback);
+#endif
+
 #ifdef USE_MIPS32R2
 pixman_implementation_t *
 _pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback);
-- 
1.7.3



More information about the Pixman mailing list