[Pixman] [PATCH 06/11] MIPS: mips32r2: Move fast paths implementation from dspr2 to mips32r2
Nemanja Lukic
nemanja.lukic at rt-rk.com
Thu Mar 13 06:13:18 PDT 2014
Some of the optimizations introduced in previous DSPr2 commits were not DSPr2
specific. Some of the fast-paths didn't used DSPr2 instructions at all, and
rather utilized more generic MIPS32r2 instruction set for optimizations. Since
Pixman's run-time CPU detection only added DSPr2 fast-paths on 74K MIPS cores,
these optimizations couldn't be used on cores that don't support DSPr2, but do
support MIPS32r2 instructions (these are almost all newer MIPS CPU cores like
4K, 24K, 34K, 1004K, etc).
This patch extracts those MIPS32r2 specific optimizations into new mips32r2 set
of fast-paths, and adds infrastructure for future MIPS32r2-only optimizations
with appropriate build and run time support.
Following is the list of MIPS32r2 optimizations, introduced in previous DSPr2
patches, tested on MIPS 24Kc core:
Performance numbers before/after on MIPS-24kc @ 500 MHz
Referent (before):
src_x888_8888 = L1: 62.41 L2: 25.76 M: 26.77 ( 89.31%) HT: 23.82 VT: 24.27 R: 20.11 RT: 11.37 ( 109Kops/s)
src_0565_0565 = L1: 253.40 L2: 60.85 M: 60.00 (100.03%) HT: 31.04 VT: 30.59 R: 29.73 RT: 13.50 ( 122Kops/s)
src_8888_8888 = L1: 190.67 L2: 30.14 M: 30.43 (101.24%) HT: 20.07 VT: 19.03 R: 19.04 RT: 10.20 ( 100Kops/s)
src_0888_0888 = L1: 180.32 L2: 40.88 M: 40.77 (101.87%) HT: 23.84 VT: 23.56 R: 24.04 RT: 11.36 ( 108Kops/s)
src_n_8888 = L1: 94.93 L2: 56.78 M: 58.23 ( 96.88%) HT: 49.54 VT: 47.02 R: 42.94 RT: 21.58 ( 154Kops/s)
Optimized (with these optimizations):
src_x888_8888 = L1: 124.93 L2: 42.84 M: 44.22 (146.85%) HT: 26.36 VT: 27.24 R: 24.26 RT: 11.77 ( 111Kops/s)
src_0565_0565 = L1: 205.31 L2: 90.44 M:105.47 (175.71%) HT: 37.51 VT: 34.80 R: 31.44 RT: 13.42 ( 121Kops/s)
src_8888_8888 = L1: 180.13 L2: 50.91 M: 53.58 (178.16%) HT: 24.70 VT: 22.78 R: 21.63 RT: 11.53 ( 109Kops/s)
src_0888_0888 = L1: 142.02 L2: 67.91 M: 71.48 (178.40%) HT: 29.96 VT: 26.80 R: 25.44 RT: 11.94 ( 112Kops/s)
src_n_8888 = L1: 250.50 L2: 143.04 M:181.86 (302.13%) HT: 69.00 VT: 61.73 R: 55.14 RT: 20.77 ( 148Kops/s)
---
pixman/pixman-mips-common-asm.h | 247 ++++++++++++++++++++
pixman/pixman-mips-common.h | 415 +++++++++++++++++++++++++++++++++
pixman/pixman-mips-dspr2-asm.S | 357 +++++++++--------------------
pixman/pixman-mips-dspr2-asm.h | 276 +----------------------
pixman/pixman-mips-dspr2.c | 215 ++++++++---------
pixman/pixman-mips-dspr2.h | 438 -----------------------------------
pixman/pixman-mips-memcpy-asm.S | 382 ------------------------------
pixman/pixman-mips32r2-asm.S | 489 +++++++++++++++++++++++++++++++++++++++
pixman/pixman-mips32r2-asm.h | 49 ++++
pixman/pixman-mips32r2.c | 134 +++++++++++
10 files changed, 1548 insertions(+), 1454 deletions(-)
delete mode 100644 pixman/pixman-mips-dspr2.h
delete mode 100644 pixman/pixman-mips-memcpy-asm.S
diff --git a/pixman/pixman-mips-common-asm.h b/pixman/pixman-mips-common-asm.h
index 1e97cc7..186f17a 100644
--- a/pixman/pixman-mips-common-asm.h
+++ b/pixman/pixman-mips-common-asm.h
@@ -32,4 +32,251 @@
#ifndef PIXMAN_MIPS_COMMON_ASM_H
#define PIXMAN_MIPS_COMMON_ASM_H
+#define zero $0
+#define AT $1
+#define v0 $2
+#define v1 $3
+#define a0 $4
+#define a1 $5
+#define a2 $6
+#define a3 $7
+#define t0 $8
+#define t1 $9
+#define t2 $10
+#define t3 $11
+#define t4 $12
+#define t5 $13
+#define t6 $14
+#define t7 $15
+#define s0 $16
+#define s1 $17
+#define s2 $18
+#define s3 $19
+#define s4 $20
+#define s5 $21
+#define s6 $22
+#define s7 $23
+#define t8 $24
+#define t9 $25
+#define k0 $26
+#define k1 $27
+#define gp $28
+#define sp $29
+#define fp $30
+#define s8 $30
+#define ra $31
+
+/*
+ * LEAF_MIPS - declare common leaf routine for MIPS
+ */
+
+#define LEAF_MIPS(symbol, suffix) \
+ .globl symbol##suffix; \
+ .align 2; \
+ .type symbol##suffix, @function; \
+ .ent symbol##suffix, 0; \
+symbol##suffix: .frame sp, 0, ra; \
+ .set push; \
+ .set arch=mips32r2; \
+ .set noreorder; \
+ .set noat;
+
+/*
+ * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
+ */
+#define LEAF_MIPS32R2(symbol) \
+LEAF_MIPS(symbol, _mips32r2)
+
+/*
+ * LEAF_MIPS_DSPR2 - declare leaf routine for MIPS DSPr2
+ */
+#define LEAF_MIPS_DSPR2(symbol) \
+LEAF_MIPS(symbol, _mips_dspr2) \
+ .set dspr2;
+
+/*
+ * END - mark end of function
+ */
+#define END(function, suffix) \
+ .set pop; \
+ .end function##suffix; \
+ .size function##suffix,.-function##suffix
+
+/*
+ * END_MIPS32R2 - mark end of mips32r2 function
+ */
+#define END_MIPS32R2(function) \
+END(function, _mips32r2)
+
+/*
+ * END_MIPS_DSPR2 - mark end of mips_dspr2 function
+ */
+#define END_MIPS_DSPR2(function) \
+END(function, _mips_dspr2)
+
+
+/*
+ * Checks if stack offset is big enough for storing/restoring regs_num
+ * number of register to/from stack. Stack offset must be greater than
+ * or equal to the number of bytes needed for storing registers (regs_num*4).
+ * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
+ * preserved for input arguments of the functions, already stored in a0-a3),
+ * stack size can be further optimized by utilizing this space.
+ */
+.macro CHECK_STACK_OFFSET regs_num, stack_offset
+.if \stack_offset < \regs_num * 4 - 16
+.error "Stack offset too small."
+.endif
+.endm
+
+/*
+ * Saves set of registers on stack. Maximum number of registers that
+ * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * before registers are pushed in order to provide enough space on stack
+ * (offset must be multiple of 4, and must be big enough, as described by
+ * CHECK_STACK_OFFSET macro). This macro is intended to be used in
+ * combination with RESTORE_REGS_FROM_STACK macro. Example:
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
+ r2 = 0, r3 = 0, r4 = 0, \
+ r5 = 0, r6 = 0, r7 = 0, \
+ r8 = 0, r9 = 0, r10 = 0, \
+ r11 = 0, r12 = 0, r13 = 0, \
+ r14 = 0
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
+ .error "Stack offset must be pozitive and multiple of 4."
+ .endif
+ .if \stack_offset != 0
+ addiu sp, sp, -\stack_offset
+ .endif
+ sw \r1, 0(sp)
+ .if \r2 != 0
+ sw \r2, 4(sp)
+ .endif
+ .if \r3 != 0
+ sw \r3, 8(sp)
+ .endif
+ .if \r4 != 0
+ sw \r4, 12(sp)
+ .endif
+ .if \r5 != 0
+ CHECK_STACK_OFFSET 5, \stack_offset
+ sw \r5, 16(sp)
+ .endif
+ .if \r6 != 0
+ CHECK_STACK_OFFSET 6, \stack_offset
+ sw \r6, 20(sp)
+ .endif
+ .if \r7 != 0
+ CHECK_STACK_OFFSET 7, \stack_offset
+ sw \r7, 24(sp)
+ .endif
+ .if \r8 != 0
+ CHECK_STACK_OFFSET 8, \stack_offset
+ sw \r8, 28(sp)
+ .endif
+ .if \r9 != 0
+ CHECK_STACK_OFFSET 9, \stack_offset
+ sw \r9, 32(sp)
+ .endif
+ .if \r10 != 0
+ CHECK_STACK_OFFSET 10, \stack_offset
+ sw \r10, 36(sp)
+ .endif
+ .if \r11 != 0
+ CHECK_STACK_OFFSET 11, \stack_offset
+ sw \r11, 40(sp)
+ .endif
+ .if \r12 != 0
+ CHECK_STACK_OFFSET 12, \stack_offset
+ sw \r12, 44(sp)
+ .endif
+ .if \r13 != 0
+ CHECK_STACK_OFFSET 13, \stack_offset
+ sw \r13, 48(sp)
+ .endif
+ .if \r14 != 0
+ CHECK_STACK_OFFSET 14, \stack_offset
+ sw \r14, 52(sp)
+ .endif
+.endm
+
+/*
+ * Restores set of registers from stack. Maximum number of registers that
+ * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
+ * Stack offset is number of bytes that are added to stack pointer (sp)
+ * after registers are restored (offset must be multiple of 4, and must
+ * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
+ * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
+ * Example:
+ * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
+ * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
+ */
+.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
+ r2 = 0, r3 = 0, r4 = 0, \
+ r5 = 0, r6 = 0, r7 = 0, \
+ r8 = 0, r9 = 0, r10 = 0, \
+ r11 = 0, r12 = 0, r13 = 0, \
+ r14 = 0
+ .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
+ .error "Stack offset must be pozitive and multiple of 4."
+ .endif
+ lw \r1, 0(sp)
+ .if \r2 != 0
+ lw \r2, 4(sp)
+ .endif
+ .if \r3 != 0
+ lw \r3, 8(sp)
+ .endif
+ .if \r4 != 0
+ lw \r4, 12(sp)
+ .endif
+ .if \r5 != 0
+ CHECK_STACK_OFFSET 5, \stack_offset
+ lw \r5, 16(sp)
+ .endif
+ .if \r6 != 0
+ CHECK_STACK_OFFSET 6, \stack_offset
+ lw \r6, 20(sp)
+ .endif
+ .if \r7 != 0
+ CHECK_STACK_OFFSET 7, \stack_offset
+ lw \r7, 24(sp)
+ .endif
+ .if \r8 != 0
+ CHECK_STACK_OFFSET 8, \stack_offset
+ lw \r8, 28(sp)
+ .endif
+ .if \r9 != 0
+ CHECK_STACK_OFFSET 9, \stack_offset
+ lw \r9, 32(sp)
+ .endif
+ .if \r10 != 0
+ CHECK_STACK_OFFSET 10, \stack_offset
+ lw \r10, 36(sp)
+ .endif
+ .if \r11 != 0
+ CHECK_STACK_OFFSET 11, \stack_offset
+ lw \r11, 40(sp)
+ .endif
+ .if \r12 != 0
+ CHECK_STACK_OFFSET 12, \stack_offset
+ lw \r12, 44(sp)
+ .endif
+ .if \r13 != 0
+ CHECK_STACK_OFFSET 13, \stack_offset
+ lw \r13, 48(sp)
+ .endif
+ .if \r14 != 0
+ CHECK_STACK_OFFSET 14, \stack_offset
+ lw \r14, 52(sp)
+ .endif
+ .if \stack_offset != 0
+ addiu sp, sp, \stack_offset
+ .endif
+.endm
+
#endif /* PIXMAN_MIPS_COMMON_ASM_H */
diff --git a/pixman/pixman-mips-common.h b/pixman/pixman-mips-common.h
index fc46ed8..1c64964 100644
--- a/pixman/pixman-mips-common.h
+++ b/pixman/pixman-mips-common.h
@@ -32,4 +32,419 @@
#ifndef PIXMAN_MIPS_COMMON_H
#define PIXMAN_MIPS_COMMON_H
+#include "pixman-private.h"
+#include "pixman-inlines.h"
+
+#define SKIP_ZERO_SRC 1
+#define SKIP_ZERO_MASK 2
+#define DO_FAST_MEMCPY 3
+
+void
+pixman_fast_memcpy_mips32r2 (void *dst, void *src, uint32_t n_bytes);
+void
+pixman_fill_buff32_mips32r2 (void *dst, uint32_t n_bytes, uint32_t value);
+
+#ifdef USE_MIPS_DSPR2
+void
+pixman_fill_buff16_mips_dspr2 (void *dst, uint32_t n_bytes, uint16_t value);
+#endif
+
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt, \
+ suffix) \
+void \
+pixman_composite_##name##_asm##suffix (dst_type *dst, \
+ src_type *src, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ src_type *src_line, *src; \
+ int32_t dst_stride, src_stride; \
+ int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; \
+ \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ src = src_line; \
+ src_line += src_stride; \
+ \
+ if (flags == DO_FAST_MEMCPY) \
+ pixman_fast_memcpy_mips32r2 (dst, src, width * bpp); \
+ else \
+ pixman_composite_##name##_asm##suffix (dst, src, width); \
+ } \
+}
+
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \
+ dst_type, dst_cnt, \
+ suffix) \
+void \
+pixman_composite_##name##_asm##suffix (dst_type *dst, \
+ uint32_t src, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dest_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ pixman_composite_##name##_asm##suffix (dst, src, width); \
+ } \
+}
+
+/**********************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt, \
+ suffix) \
+void \
+pixman_composite_##name##_asm##suffix (dst_type *dst, \
+ uint32_t src, \
+ mask_type *mask, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ mask_type *mask_line, *mask; \
+ int32_t dst_stride, mask_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid ( \
+ imp, src_image, dest_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_SRC) && src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ pixman_composite_##name##_asm##suffix (dst, src, mask, width); \
+ } \
+}
+
+/**********************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt, \
+ suffix) \
+void \
+pixman_composite_##name##_asm##suffix (dst_type *dst, \
+ src_type *src, \
+ uint32_t mask, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ src_type *src_line, *src; \
+ int32_t dst_stride, src_stride; \
+ uint32_t mask; \
+ \
+ mask = _pixman_image_get_solid ( \
+ imp, mask_image, dest_image->bits.format); \
+ \
+ if ((flags & SKIP_ZERO_MASK) && mask == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ src = src_line; \
+ src_line += src_stride; \
+ \
+ pixman_composite_##name##_asm##suffix (dst, src, mask, width); \
+ } \
+}
+
+/************************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt, \
+ suffix) \
+void \
+pixman_composite_##name##_asm##suffix (dst_type *dst, \
+ src_type *src, \
+ mask_type *mask, \
+ int32_t w); \
+ \
+static void \
+mips_composite_##name (pixman_implementation_t *imp, \
+ pixman_composite_info_t *info) \
+{ \
+ PIXMAN_COMPOSITE_ARGS (info); \
+ dst_type *dst_line, *dst; \
+ src_type *src_line, *src; \
+ mask_type *mask_line, *mask; \
+ int32_t dst_stride, src_stride, mask_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ while (height--) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ src = src_line; \
+ src_line += src_stride; \
+ pixman_composite_##name##_asm##suffix (dst, src, mask, width); \
+ } \
+}
+
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \
+ src_type, dst_type, \
+ suffix) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix ( \
+ dst_type * dst, \
+ const src_type * src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+ \
+static force_inline void \
+scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix (pd, ps, w, \
+ vx, unit_x); \
+} \
+ \
+FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, COVER) \
+FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, NONE) \
+FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, dst_type, PAD)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+
+
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \
+ src_type, dst_type, \
+ suffix) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix ( \
+ dst_type * dst, \
+ const src_type * src, \
+ const uint8_t * mask, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+ \
+static force_inline void \
+scaled_nearest_scanline_mips_##name##_##op (const uint8_t * mask, \
+ dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm##suffix (pd, ps, \
+ mask, w, \
+ vx, unit_x); \
+} \
+ \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \
+ scaled_nearest_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+/*******************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \
+ src_type, dst_type, \
+ suffix) \
+void \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix( \
+ dst_type * dst, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
+static force_inline void \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \
+ const uint32_t * mask, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix (dst, src_top, \
+ src_bottom, w, \
+ wt, wb, \
+ vx, unit_x); \
+} \
+ \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint32_t, dst_type, NORMAL, \
+ FLAG_NONE)
+
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \
+ src_type, dst_type, \
+ suffix) \
+void \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix ( \
+ dst_type * dst, \
+ const uint8_t * mask, \
+ const src_type * top, \
+ const src_type * bottom, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t x, \
+ pixman_fixed_t ux, \
+ int width); \
+ \
+static force_inline void \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \
+ const uint8_t * mask, \
+ const src_type * src_top, \
+ const src_type * src_bottom, \
+ int32_t w, \
+ int wt, \
+ int wb, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_bilinear_scanline_##name##_##op##_asm##suffix ( \
+ dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
+} \
+ \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, COVER, \
+ FLAG_HAVE_NON_SOLID_MASK) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, NONE, \
+ FLAG_HAVE_NON_SOLID_MASK) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, PAD, \
+ FLAG_HAVE_NON_SOLID_MASK) \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \
+ scaled_bilinear_scanline_mips_##name##_##op, \
+ src_type, uint8_t, dst_type, NORMAL, \
+ FLAG_HAVE_NON_SOLID_MASK)
+
#endif /* PIXMAN_MIPS_COMMON_H */
diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 15e7fa3..f8eadf1 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -32,7 +32,7 @@
#include "pixman-private.h"
#include "pixman-mips-dspr2-asm.h"
-LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
+LEAF_MIPS_DSPR2(pixman_fill_buff16)
/*
* a0 - *dest
* a1 - count (bytes)
@@ -86,58 +86,9 @@ LEAF_MIPS_DSPR2(pixman_fill_buff16_mips)
jr ra
nop
-END(pixman_fill_buff16_mips)
+END_MIPS_DSPR2(pixman_fill_buff16)
-LEAF_MIPS32R2(pixman_fill_buff32_mips)
-/*
- * a0 - *dest
- * a1 - count (bytes)
- * a2 - value to fill buffer with
- */
-
- beqz a1, 3f
- nop
- srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
- beqz t1, 2f
- nop
-1:
- addiu t1, t1, -1
- beqz t1, 11f
- addiu a1, a1, -32
- pref 30, 32(a0)
- sw a2, 0(a0)
- sw a2, 4(a0)
- sw a2, 8(a0)
- sw a2, 12(a0)
- sw a2, 16(a0)
- sw a2, 20(a0)
- sw a2, 24(a0)
- sw a2, 28(a0)
- b 1b
- addiu a0, a0, 32
-11:
- sw a2, 0(a0)
- sw a2, 4(a0)
- sw a2, 8(a0)
- sw a2, 12(a0)
- sw a2, 16(a0)
- sw a2, 20(a0)
- sw a2, 24(a0)
- sw a2, 28(a0)
- addiu a0, a0, 32
-2:
- blez a1, 3f
- addiu a1, a1, -4
- sw a2, 0(a0)
- b 2b
- addiu a0, a0, 4
-3:
- jr ra
- nop
-
-END(pixman_fill_buff32_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8r8g8b8)
@@ -178,9 +129,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_8888_0565_asm_mips)
j ra
nop
-END(pixman_composite_src_8888_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_8888_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (r5g6b5)
@@ -220,98 +171,10 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0565_8888_asm_mips)
j ra
nop
-END(pixman_composite_src_0565_8888_asm_mips)
-
-LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
-/*
- * a0 - dst (a8r8g8b8)
- * a1 - src (x8r8g8b8)
- * a2 - w
- */
-
- beqz a2, 4f
- nop
- li t9, 0xff000000
- srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */
- beqz t8, 3f /* branch if less than 8 src pixels */
- nop
-1:
- addiu t8, t8, -1
- beqz t8, 2f
- addiu a2, a2, -8
- pref 0, 32(a1)
- lw t0, 0(a1)
- lw t1, 4(a1)
- lw t2, 8(a1)
- lw t3, 12(a1)
- lw t4, 16(a1)
- lw t5, 20(a1)
- lw t6, 24(a1)
- lw t7, 28(a1)
- addiu a1, a1, 32
- or t0, t0, t9
- or t1, t1, t9
- or t2, t2, t9
- or t3, t3, t9
- or t4, t4, t9
- or t5, t5, t9
- or t6, t6, t9
- or t7, t7, t9
- pref 30, 32(a0)
- sw t0, 0(a0)
- sw t1, 4(a0)
- sw t2, 8(a0)
- sw t3, 12(a0)
- sw t4, 16(a0)
- sw t5, 20(a0)
- sw t6, 24(a0)
- sw t7, 28(a0)
- b 1b
- addiu a0, a0, 32
-2:
- lw t0, 0(a1)
- lw t1, 4(a1)
- lw t2, 8(a1)
- lw t3, 12(a1)
- lw t4, 16(a1)
- lw t5, 20(a1)
- lw t6, 24(a1)
- lw t7, 28(a1)
- addiu a1, a1, 32
- or t0, t0, t9
- or t1, t1, t9
- or t2, t2, t9
- or t3, t3, t9
- or t4, t4, t9
- or t5, t5, t9
- or t6, t6, t9
- or t7, t7, t9
- sw t0, 0(a0)
- sw t1, 4(a0)
- sw t2, 8(a0)
- sw t3, 12(a0)
- sw t4, 16(a0)
- sw t5, 20(a0)
- sw t6, 24(a0)
- sw t7, 28(a0)
- beqz a2, 4f
- addiu a0, a0, 32
-3:
- lw t0, 0(a1)
- addiu a1, a1, 4
- addiu a2, a2, -1
- or t1, t0, t9
- sw t1, 0(a0)
- bnez a2, 3b
- addiu a0, a0, 4
-4:
- jr ra
- nop
-
-END(pixman_composite_src_x888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_0565_8888_asm)
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (b8g8r8)
@@ -502,9 +365,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm_mips)
j ra
nop
-END(pixman_composite_src_0888_8888_rev_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_0888_8888_rev_asm)
-LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (b8g8r8)
@@ -696,10 +559,10 @@ LEAF_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm_mips)
j ra
nop
-END(pixman_composite_src_0888_0565_rev_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_0888_0565_rev_asm)
#endif
-LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm)
/*
* a0 - dst (a8b8g8r8)
* a1 - src (a8r8g8b8)
@@ -759,9 +622,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm_mips)
j ra
nop
-END(pixman_composite_src_pixbuf_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_pixbuf_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -818,9 +681,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm_mips)
j ra
nop
-END(pixman_composite_src_rpixbuf_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_rpixbuf_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (32bit constant)
@@ -871,9 +734,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
j ra
nop
-END(pixman_composite_src_n_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm)
/*
* a0 - dst (a8)
* a1 - src (32bit constant)
@@ -951,9 +814,9 @@ LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
j ra
nop
-END(pixman_composite_src_n_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_src_n_8_8_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (32bit constant)
@@ -1071,9 +934,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
j ra
nop
-END(pixman_composite_over_n_8888_8888_ca_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (32bit constant)
@@ -1204,9 +1067,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
j ra
nop
-END(pixman_composite_over_n_8888_0565_ca_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm)
/*
* a0 - dst (a8)
* a1 - src (32bit constant)
@@ -1324,9 +1187,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8_asm_mips)
j ra
nop
-END(pixman_composite_over_n_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8_8_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (32bit constant)
@@ -1432,9 +1295,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_n_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (32bit constant)
@@ -1548,9 +1411,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_n_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -1599,9 +1462,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_n_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8r8g8b8)
@@ -1656,9 +1519,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_n_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_n_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (r5g6b5)
@@ -1715,9 +1578,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_0565_n_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_0565_n_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -1766,9 +1629,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8r8g8b8)
@@ -1824,9 +1687,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (r5g6b5)
@@ -1884,9 +1747,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_0565_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_0565_8_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -1937,9 +1800,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8888_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -2008,9 +1871,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8r8g8b8)
@@ -2086,9 +1949,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_8888_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_8888_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (32bit constant)
@@ -2156,9 +2019,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
j ra
nop
-END(pixman_composite_over_n_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (32bit constant)
@@ -2220,9 +2083,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_n_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm)
/*
* a0 - dst (a8)
* a1 - src (a8)
@@ -2324,9 +2187,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
j ra
nop
-END(pixman_composite_add_8_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8_8_8_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm)
/*
* a0 - dst (a8)
* a1 - src (32bit constant)
@@ -2420,9 +2283,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8_asm_mips)
j ra
nop
-END(pixman_composite_add_n_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_n_8_8_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (32bit constant)
@@ -2472,9 +2335,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm_mips)
j ra
nop
-END(pixman_composite_add_n_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_n_8_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (r5g6b5)
@@ -2535,9 +2398,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm_mips)
j ra
nop
-END(pixman_composite_add_0565_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_0565_8_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -2589,9 +2452,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm_mips)
j ra
nop
-END(pixman_composite_add_8888_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_8_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -2642,9 +2505,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm_mips)
j ra
nop
-END(pixman_composite_add_8888_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_n_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -2699,9 +2562,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm_mips)
j ra
nop
-END(pixman_composite_add_8888_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_8888_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm)
/*
* a0 - dst (a8)
* a1 - src (a8)
@@ -2767,9 +2630,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8_8_asm_mips)
j ra
nop
-END(pixman_composite_add_8_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8_8_asm)
-LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -2844,9 +2707,9 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
jr ra
nop
-END(pixman_composite_add_8888_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_add_8888_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8)
@@ -2905,9 +2768,9 @@ LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm_mips)
j ra
nop
-END(pixman_composite_out_reverse_8_0565_asm_mips)
+END_MIPS_DSPR2(pixman_composite_out_reverse_8_0565_asm)
-LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8)
@@ -2954,9 +2817,9 @@ LEAF_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm_mips)
j ra
nop
-END(pixman_composite_out_reverse_8_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_out_reverse_8_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (32bit constant)
@@ -3090,9 +2953,9 @@ LEAF_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm_mips)
j ra
nop
-END(pixman_composite_over_reverse_n_8888_asm_mips)
+END_MIPS_DSPR2(pixman_composite_over_reverse_n_8888_asm)
-LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
+LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm)
/*
* a0 - dst (a8)
* a1 - src (32bit constant)
@@ -3166,9 +3029,9 @@ LEAF_MIPS_DSPR2(pixman_composite_in_n_8_asm_mips)
j ra
nop
-END(pixman_composite_in_n_8_asm_mips)
+END_MIPS_DSPR2(pixman_composite_in_n_8_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (a8r8g8b8)
@@ -3227,9 +3090,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
j ra
nop
-END(pixman_scaled_nearest_scanline_8888_8888_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8888_OVER_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8r8g8b8)
@@ -3293,9 +3156,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
j ra
nop
-END(pixman_scaled_nearest_scanline_8888_0565_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_0565_OVER_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - src (r5g6b5)
@@ -3352,9 +3215,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_nearest_scanline_0565_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8888_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (a8r8g8b8)
@@ -3429,9 +3292,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
j ra
nop
-END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm)
/*
* a0 - dst (r5g6b5)
* a1 - src (r5g6b5)
@@ -3509,9 +3372,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
j ra
nop
-END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm)
/*
* a0 - *dst
* a1 - *src_top
@@ -3567,9 +3430,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm)
/*
* a0 - *dst
* a1 - *src_top
@@ -3626,9 +3489,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm)
/*
* a0 - *dst
* a1 - *src_top
@@ -3690,9 +3553,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm)
/*
* a0 - *dst
* a1 - *src_top
@@ -3755,9 +3618,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm)
/*
* a0 - *dst
* a1 - *src_top
@@ -3816,9 +3679,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm)
/*
* a0 - *dst
* a1 - *src_top
@@ -3876,9 +3739,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm)
/*
* a0 - *dst
* a1 - *mask
@@ -3940,9 +3803,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm)
/*
* a0 - *dst
* a1 - *mask
@@ -4005,9 +3868,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm)
/*
* a0 - *dst
* a1 - *mask
@@ -4076,9 +3939,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm)
/*
* a0 - *dst
* a1 - *mask
@@ -4148,9 +4011,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm)
/*
* a0 - dst (a8r8g8b8)
* a1 - mask (a8)
@@ -4215,9 +4078,9 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm)
-LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm)
/*
* a0 - *dst
* a1 - *mask
@@ -4280,4 +4143,4 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
j ra
nop
-END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
+END_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index ec46715..b43d008 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -32,262 +32,7 @@
#ifndef PIXMAN_MIPS_DSPR2_ASM_H
#define PIXMAN_MIPS_DSPR2_ASM_H
-#define zero $0
-#define AT $1
-#define v0 $2
-#define v1 $3
-#define a0 $4
-#define a1 $5
-#define a2 $6
-#define a3 $7
-#define t0 $8
-#define t1 $9
-#define t2 $10
-#define t3 $11
-#define t4 $12
-#define t5 $13
-#define t6 $14
-#define t7 $15
-#define s0 $16
-#define s1 $17
-#define s2 $18
-#define s3 $19
-#define s4 $20
-#define s5 $21
-#define s6 $22
-#define s7 $23
-#define t8 $24
-#define t9 $25
-#define k0 $26
-#define k1 $27
-#define gp $28
-#define sp $29
-#define fp $30
-#define s8 $30
-#define ra $31
-
-/*
- * LEAF_MIPS32R2 - declare leaf routine for MIPS32r2
- */
-#define LEAF_MIPS32R2(symbol) \
- .globl symbol; \
- .align 2; \
- .type symbol, @function; \
- .ent symbol, 0; \
-symbol: .frame sp, 0, ra; \
- .set push; \
- .set arch=mips32r2; \
- .set noreorder; \
- .set noat;
-
-/*
- * LEAF_MIPS32R2 - declare leaf routine for MIPS DSPr2
- */
-#define LEAF_MIPS_DSPR2(symbol) \
-LEAF_MIPS32R2(symbol) \
- .set dspr2;
-
-/*
- * END - mark end of function
- */
-#define END(function) \
- .set pop; \
- .end function; \
- .size function,.-function
-
-/*
- * Checks if stack offset is big enough for storing/restoring regs_num
- * number of register to/from stack. Stack offset must be greater than
- * or equal to the number of bytes needed for storing registers (regs_num*4).
- * Since MIPS ABI allows usage of first 16 bytes of stack frame (this is
- * preserved for input arguments of the functions, already stored in a0-a3),
- * stack size can be further optimized by utilizing this space.
- */
-.macro CHECK_STACK_OFFSET regs_num, stack_offset
-.if \stack_offset < \regs_num * 4 - 16
-.error "Stack offset too small."
-.endif
-.endm
-
-/*
- * Saves set of registers on stack. Maximum number of registers that
- * can be saved on stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
- * Stack offset is number of bytes that are added to stack pointer (sp)
- * before registers are pushed in order to provide enough space on stack
- * (offset must be multiple of 4, and must be big enough, as described by
- * CHECK_STACK_OFFSET macro). This macro is intended to be used in
- * combination with RESTORE_REGS_FROM_STACK macro. Example:
- * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
- * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
- */
-.macro SAVE_REGS_ON_STACK stack_offset = 0, r1, \
- r2 = 0, r3 = 0, r4 = 0, \
- r5 = 0, r6 = 0, r7 = 0, \
- r8 = 0, r9 = 0, r10 = 0, \
- r11 = 0, r12 = 0, r13 = 0, \
- r14 = 0
- .if (\stack_offset < 0) || (\stack_offset - (\stack_offset / 4) * 4)
- .error "Stack offset must be pozitive and multiple of 4."
- .endif
- .if \stack_offset != 0
- addiu sp, sp, -\stack_offset
- .endif
- sw \r1, 0(sp)
- .if \r2 != 0
- sw \r2, 4(sp)
- .endif
- .if \r3 != 0
- sw \r3, 8(sp)
- .endif
- .if \r4 != 0
- sw \r4, 12(sp)
- .endif
- .if \r5 != 0
- CHECK_STACK_OFFSET 5, \stack_offset
- sw \r5, 16(sp)
- .endif
- .if \r6 != 0
- CHECK_STACK_OFFSET 6, \stack_offset
- sw \r6, 20(sp)
- .endif
- .if \r7 != 0
- CHECK_STACK_OFFSET 7, \stack_offset
- sw \r7, 24(sp)
- .endif
- .if \r8 != 0
- CHECK_STACK_OFFSET 8, \stack_offset
- sw \r8, 28(sp)
- .endif
- .if \r9 != 0
- CHECK_STACK_OFFSET 9, \stack_offset
- sw \r9, 32(sp)
- .endif
- .if \r10 != 0
- CHECK_STACK_OFFSET 10, \stack_offset
- sw \r10, 36(sp)
- .endif
- .if \r11 != 0
- CHECK_STACK_OFFSET 11, \stack_offset
- sw \r11, 40(sp)
- .endif
- .if \r12 != 0
- CHECK_STACK_OFFSET 12, \stack_offset
- sw \r12, 44(sp)
- .endif
- .if \r13 != 0
- CHECK_STACK_OFFSET 13, \stack_offset
- sw \r13, 48(sp)
- .endif
- .if \r14 != 0
- CHECK_STACK_OFFSET 14, \stack_offset
- sw \r14, 52(sp)
- .endif
-.endm
-
-/*
- * Restores set of registers from stack. Maximum number of registers that
- * can be restored from stack is limitted to 14 (a0-a3, v0-v1 and s0-s7).
- * Stack offset is number of bytes that are added to stack pointer (sp)
- * after registers are restored (offset must be multiple of 4, and must
- * be big enough, as described by CHECK_STACK_OFFSET macro). This macro is
- * intended to be used in combination with RESTORE_REGS_FROM_STACK macro.
- * Example:
- * SAVE_REGS_ON_STACK 4, v0, v1, s0, s1
- * RESTORE_REGS_FROM_STACK 4, v0, v1, s0, s1
- */
-.macro RESTORE_REGS_FROM_STACK stack_offset = 0, r1, \
- r2 = 0, r3 = 0, r4 = 0, \
- r5 = 0, r6 = 0, r7 = 0, \
- r8 = 0, r9 = 0, r10 = 0, \
- r11 = 0, r12 = 0, r13 = 0, \
- r14 = 0
- .if (\stack_offset < 0) || (\stack_offset - (\stack_offset/4)*4)
- .error "Stack offset must be pozitive and multiple of 4."
- .endif
- lw \r1, 0(sp)
- .if \r2 != 0
- lw \r2, 4(sp)
- .endif
- .if \r3 != 0
- lw \r3, 8(sp)
- .endif
- .if \r4 != 0
- lw \r4, 12(sp)
- .endif
- .if \r5 != 0
- CHECK_STACK_OFFSET 5, \stack_offset
- lw \r5, 16(sp)
- .endif
- .if \r6 != 0
- CHECK_STACK_OFFSET 6, \stack_offset
- lw \r6, 20(sp)
- .endif
- .if \r7 != 0
- CHECK_STACK_OFFSET 7, \stack_offset
- lw \r7, 24(sp)
- .endif
- .if \r8 != 0
- CHECK_STACK_OFFSET 8, \stack_offset
- lw \r8, 28(sp)
- .endif
- .if \r9 != 0
- CHECK_STACK_OFFSET 9, \stack_offset
- lw \r9, 32(sp)
- .endif
- .if \r10 != 0
- CHECK_STACK_OFFSET 10, \stack_offset
- lw \r10, 36(sp)
- .endif
- .if \r11 != 0
- CHECK_STACK_OFFSET 11, \stack_offset
- lw \r11, 40(sp)
- .endif
- .if \r12 != 0
- CHECK_STACK_OFFSET 12, \stack_offset
- lw \r12, 44(sp)
- .endif
- .if \r13 != 0
- CHECK_STACK_OFFSET 13, \stack_offset
- lw \r13, 48(sp)
- .endif
- .if \r14 != 0
- CHECK_STACK_OFFSET 14, \stack_offset
- lw \r14, 52(sp)
- .endif
- .if \stack_offset != 0
- addiu sp, sp, \stack_offset
- .endif
-.endm
-
-/*
- * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
- * returned in (out_8888) register. Requires two temporary registers
- * (scratch1 and scratch2).
- */
-.macro CONVERT_1x0565_TO_1x8888 in_565, \
- out_8888, \
- scratch1, scratch2
- lui \out_8888, 0xff00
- sll \scratch1, \in_565, 0x3
- andi \scratch2, \scratch1, 0xff
- ext \scratch1, \in_565, 0x2, 0x3
- or \scratch1, \scratch2, \scratch1
- or \out_8888, \out_8888, \scratch1
-
- sll \scratch1, \in_565, 0x5
- andi \scratch1, \scratch1, 0xfc00
- srl \scratch2, \in_565, 0x1
- andi \scratch2, \scratch2, 0x300
- or \scratch2, \scratch1, \scratch2
- or \out_8888, \out_8888, \scratch2
-
- andi \scratch1, \in_565, 0xf800
- srl \scratch2, \scratch1, 0x5
- andi \scratch2, \scratch2, 0xff00
- or \scratch1, \scratch1, \scratch2
- sll \scratch1, \scratch1, 0x8
- or \out_8888, \out_8888, \scratch1
-.endm
+#include "pixman-mips32r2-asm.h"
/*
* Conversion of two r5g6b5 pixels (in1_565 and in2_565) to two a8r8g8b8 pixels
@@ -324,23 +69,6 @@ LEAF_MIPS32R2(symbol) \
.endm
/*
- * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
- * returned in (out_565) register. Requires two temporary registers
- * (scratch1 and scratch2).
- */
-.macro CONVERT_1x8888_TO_1x0565 in_8888, \
- out_565, \
- scratch1, scratch2
- ext \out_565, \in_8888, 0x3, 0x5
- srl \scratch1, \in_8888, 0x5
- andi \scratch1, \scratch1, 0x07e0
- srl \scratch2, \in_8888, 0x8
- andi \scratch2, \scratch2, 0xf800
- or \out_565, \out_565, \scratch1
- or \out_565, \out_565, \scratch2
-.endm
-
-/*
* Conversion of two a8r8g8b8 pixels (in1_8888 and in2_8888) to two r5g6b5
* pixels returned in (out1_565 and out2_565) registers. Requires two temporary
* registers (scratch1 and scratch2). It also requires maskR, maskG and maskB
@@ -707,4 +435,4 @@ LEAF_MIPS32R2(symbol) \
precrq.qb.ph \tl, \alpha, \scratch1
.endm
-#endif //PIXMAN_MIPS_DSPR2_ASM_H
+#endif /* PIXMAN_MIPS_DSPR2_ASM_H */
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 8e90c97..742c5e8 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -34,134 +34,126 @@
#endif
#include "pixman-private.h"
-#include "pixman-mips-dspr2.h"
+#include "pixman-mips-common.h"
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888,
- uint32_t, 1, uint32_t, 1)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_8888_0565,
- uint32_t, 1, uint16_t, 1)
+ uint32_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0565_8888,
- uint16_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565,
- uint16_t, 1, uint16_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
- uint32_t, 1, uint32_t, 1)
-PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
- uint8_t, 3, uint8_t, 3)
+ uint16_t, 1, uint32_t, 1, _mips_dspr2)
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_8888_rev,
- uint8_t, 3, uint32_t, 1)
+ uint8_t, 3, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_0888_0565_rev,
- uint8_t, 3, uint16_t, 1)
+ uint8_t, 3, uint16_t, 1, _mips_dspr2)
#endif
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_pixbuf_8888,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_rpixbuf_8888,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_8888,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, over_8888_0565,
- uint32_t, 1, uint16_t, 1)
+ uint32_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
- uint8_t, 1, uint8_t, 1)
+ uint8_t, 1, uint8_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_0565,
- uint8_t, 1, uint16_t, 1)
+ uint8_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, out_reverse_8_8888,
- uint8_t, 1, uint32_t, 1)
+ uint8_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888,
- uint8_t, 1, uint32_t, 1)
+ uint8_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8,
- uint8_t, 1, uint8_t, 1)
+ uint8_t, 1, uint8_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
- uint32_t, 1, uint16_t, 1)
+ uint32_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8,
- uint8_t, 1, uint8_t, 1)
+ uint8_t, 1, uint8_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
- uint8_t, 1, uint32_t, 1)
+ uint8_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
- uint8_t, 1, uint16_t, 1)
+ uint8_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8,
- uint8_t, 1, uint8_t, 1)
+ uint8_t, 1, uint8_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, add_n_8_8888,
- uint8_t, 1, uint32_t, 1)
+ uint8_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_8888_n_0565,
- uint32_t, 1, uint16_t, 1)
+ uint32_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
- uint16_t, 1, uint16_t, 1)
+ uint16_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
- uint16_t, 1)
+ uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
- uint32_t, 1)
+ uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_reverse_n_8888,
- uint32_t, 1)
+ uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_N_DST (0, in_n_8,
- uint8_t, 1)
+ uint8_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t, 1,
- uint8_t, 1, uint8_t, 1)
+ uint8_t, 1, uint8_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
- uint8_t, 1, uint32_t, 1)
+ uint8_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8888_8888, uint32_t, 1,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_0565_8_0565, uint16_t, 1,
- uint8_t, 1, uint16_t, 1)
+ uint8_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_8888, uint32_t, 1,
- uint8_t, 1, uint32_t, 1)
+ uint8_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8_0565, uint32_t, 1,
- uint8_t, 1, uint16_t, 1)
+ uint8_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_0565_8_0565, uint16_t, 1,
- uint8_t, 1, uint16_t, 1)
+ uint8_t, 1, uint16_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (over_8888_8888_8888, uint32_t, 1,
- uint32_t, 1, uint32_t, 1)
+ uint32_t, 1, uint32_t, 1, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_8888, OVER,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (8888_0565, OVER,
- uint32_t, uint16_t)
+ uint32_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST (0565_8888, SRC,
- uint16_t, uint32_t)
+ uint16_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
- uint32_t, uint16_t)
+ uint32_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC,
- uint16_t, uint32_t)
+ uint16_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC,
- uint16_t, uint16_t)
+ uint16_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565,
- OVER, uint32_t, uint16_t)
+ OVER, uint32_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565,
- OVER, uint16_t, uint16_t)
+ OVER, uint16_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
- uint32_t, uint16_t)
+ uint32_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC,
- uint16_t, uint32_t)
+ uint16_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC,
- uint16_t, uint16_t)
+ uint16_t, uint16_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD,
- uint32_t, uint32_t)
+ uint32_t, uint32_t, _mips_dspr2)
static pixman_bool_t
mips_dspr2_fill (pixman_implementation_t *imp,
@@ -176,6 +168,7 @@ mips_dspr2_fill (pixman_implementation_t *imp,
{
uint8_t *byte_line;
uint32_t byte_width;
+
switch (bpp)
{
case 16:
@@ -188,20 +181,32 @@ mips_dspr2_fill (pixman_implementation_t *imp,
{
uint8_t *dst = byte_line;
byte_line += stride;
- pixman_fill_buff16_mips (dst, byte_width, _xor & 0xffff);
+ pixman_fill_buff16_mips_dspr2 (dst, byte_width, _xor & 0xffff);
}
return TRUE;
case 32:
+#ifdef USE_MIPS32R2
stride = stride * (int) sizeof (uint32_t) / 4;
byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
byte_width = width * 4;
stride *= 4;
-
+#else
+ bits = bits + y * stride + x;
+#endif
while (height--)
{
+#ifdef USE_MIPS32R2
uint8_t *dst = byte_line;
byte_line += stride;
- pixman_fill_buff32_mips (dst, byte_width, _xor);
+ pixman_fill_buff32_mips32r2 (dst, byte_width, _xor);
+#else
+ int i;
+
+ for (i = 0; i < width; ++i)
+ bits[i] = _xor;
+
+ bits += stride;
+#endif
}
return TRUE;
default:
@@ -230,58 +235,51 @@ mips_dspr2_blt (pixman_implementation_t *imp,
uint8_t *src_bytes;
uint8_t *dst_bytes;
uint32_t byte_width;
+ int32_t temp;
- switch (src_bpp)
+ temp = src_bpp >> 3;
+
+ src_stride = src_stride * (int) sizeof (uint32_t) / temp;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / temp;
+ if (src_bpp == 16)
{
- case 16:
- src_stride = src_stride * (int) sizeof (uint32_t) / 2;
- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+ src_stride * (src_y) + (src_x));
dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+ dst_stride * (dest_y) + (dest_x));
- byte_width = width * 2;
- src_stride *= 2;
- dst_stride *= 2;
-
- while (height--)
- {
- uint8_t *src = src_bytes;
- uint8_t *dst = dst_bytes;
- src_bytes += src_stride;
- dst_bytes += dst_stride;
- pixman_mips_fast_memcpy (dst, src, byte_width);
- }
- return TRUE;
- case 32:
- src_stride = src_stride * (int) sizeof (uint32_t) / 4;
- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+ }
+ else if (src_bpp == 32)
+ {
src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+ src_stride * (src_y) + (src_x));
dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+ dst_stride * (dest_y) + (dest_x));
- byte_width = width * 4;
- src_stride *= 4;
- dst_stride *= 4;
-
- while (height--)
- {
- uint8_t *src = src_bytes;
- uint8_t *dst = dst_bytes;
- src_bytes += src_stride;
- dst_bytes += dst_stride;
- pixman_mips_fast_memcpy (dst, src, byte_width);
- }
- return TRUE;
- default:
+ }
+ else
return FALSE;
+
+ byte_width = width * temp;
+ src_stride *= temp;
+ dst_stride *= temp;
+
+ while (height--)
+ {
+ uint8_t *src = src_bytes;
+ uint8_t *dst = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+#ifdef USE_MIPS32R2
+ pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+#else
+ memcpy (dst, src, byte_width);
+#endif
}
+
+ return TRUE;
}
static const pixman_fast_path_t mips_dspr2_fast_paths[] =
{
- PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565),
- PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565),
PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565),
PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, mips_composite_src_8888_0565),
PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, mips_composite_src_8888_0565),
@@ -290,15 +288,6 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, x8r8g8b8, mips_composite_src_0565_8888),
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, a8b8g8r8, mips_composite_src_0565_8888),
PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, x8b8g8r8, mips_composite_src_0565_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
- PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888),
#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, x8r8g8b8, mips_composite_src_0888_8888_rev),
PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, r5g6b5, mips_composite_src_0888_0565_rev),
@@ -437,10 +426,10 @@ mips_dspr2_combine_over_u (pixman_implementation_t *imp,
int width)
{
if (mask)
- pixman_composite_over_8888_8888_8888_asm_mips (
+ pixman_composite_over_8888_8888_8888_asm_mips_dspr2 (
dest, (uint32_t *)src, (uint32_t *)mask, width);
else
- pixman_composite_over_8888_8888_asm_mips (
+ pixman_composite_over_8888_8888_asm_mips_dspr2 (
dest, (uint32_t *)src, width);
}
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
deleted file mode 100644
index 7a7fd39..0000000
--- a/pixman/pixman-mips-dspr2.h
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * Copyright (c) 2012
- * MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * Author: Nemanja Lukic (nemanja.lukic at rt-rk.com)
- */
-
-#ifndef PIXMAN_MIPS_DSPR2_H
-#define PIXMAN_MIPS_DSPR2_H
-
-#include "pixman-private.h"
-#include "pixman-inlines.h"
-
-#define SKIP_ZERO_SRC 1
-#define SKIP_ZERO_MASK 2
-#define DO_FAST_MEMCPY 3
-
-void
-pixman_mips_fast_memcpy (void *dst, void *src, uint32_t n_bytes);
-void
-pixman_fill_buff16_mips (void *dst, uint32_t n_bytes, uint16_t value);
-void
-pixman_fill_buff32_mips (void *dst, uint32_t n_bytes, uint32_t value);
-
-/****************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST(flags, name, \
- src_type, src_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_mips (dst_type *dst, \
- src_type *src, \
- int32_t w); \
- \
-static void \
-mips_composite_##name (pixman_implementation_t *imp, \
- pixman_composite_info_t *info) \
-{ \
- PIXMAN_COMPOSITE_ARGS (info); \
- dst_type *dst_line, *dst; \
- src_type *src_line, *src; \
- int32_t dst_stride, src_stride; \
- int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; \
- \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- while (height--) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- src = src_line; \
- src_line += src_stride; \
- \
- if (flags == DO_FAST_MEMCPY) \
- pixman_mips_fast_memcpy (dst, src, width * bpp); \
- else \
- pixman_composite_##name##_asm_mips (dst, src, width); \
- } \
-}
-
-/****************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_mips (dst_type *dst, \
- uint32_t src, \
- int32_t w); \
- \
-static void \
-mips_composite_##name (pixman_implementation_t *imp, \
- pixman_composite_info_t *info) \
-{ \
- PIXMAN_COMPOSITE_ARGS (info); \
- dst_type *dst_line, *dst; \
- int32_t dst_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid ( \
- imp, src_image, dest_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_SRC) && src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- while (height--) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- \
- pixman_composite_##name##_asm_mips (dst, src, width); \
- } \
-}
-
-/*******************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name, \
- mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_mips (dst_type *dst, \
- uint32_t src, \
- mask_type *mask, \
- int32_t w); \
- \
-static void \
-mips_composite_##name (pixman_implementation_t *imp, \
- pixman_composite_info_t *info) \
-{ \
- PIXMAN_COMPOSITE_ARGS (info); \
- dst_type *dst_line, *dst; \
- mask_type *mask_line, *mask; \
- int32_t dst_stride, mask_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid ( \
- imp, src_image, dest_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_SRC) && src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- while (height--) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- mask = mask_line; \
- mask_line += mask_stride; \
- pixman_composite_##name##_asm_mips (dst, src, mask, width); \
- } \
-}
-
-/*******************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST(flags, name, \
- src_type, src_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_mips (dst_type *dst, \
- src_type *src, \
- uint32_t mask, \
- int32_t w); \
- \
-static void \
-mips_composite_##name (pixman_implementation_t *imp, \
- pixman_composite_info_t *info) \
-{ \
- PIXMAN_COMPOSITE_ARGS (info); \
- dst_type *dst_line, *dst; \
- src_type *src_line, *src; \
- int32_t dst_stride, src_stride; \
- uint32_t mask; \
- \
- mask = _pixman_image_get_solid ( \
- imp, mask_image, dest_image->bits.format); \
- \
- if ((flags & SKIP_ZERO_MASK) && mask == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- \
- while (height--) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- src = src_line; \
- src_line += src_stride; \
- \
- pixman_composite_##name##_asm_mips (dst, src, mask, width); \
- } \
-}
-
-/************************************************************************/
-
-#define PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST(name, src_type, src_cnt, \
- mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_mips (dst_type *dst, \
- src_type *src, \
- mask_type *mask, \
- int32_t w); \
- \
-static void \
-mips_composite_##name (pixman_implementation_t *imp, \
- pixman_composite_info_t *info) \
-{ \
- PIXMAN_COMPOSITE_ARGS (info); \
- dst_type *dst_line, *dst; \
- src_type *src_line, *src; \
- mask_type *mask_line, *mask; \
- int32_t dst_stride, src_stride, mask_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- while (height--) \
- { \
- dst = dst_line; \
- dst_line += dst_stride; \
- mask = mask_line; \
- mask_line += mask_stride; \
- src = src_line; \
- src_line += src_stride; \
- pixman_composite_##name##_asm_mips (dst, src, mask, width); \
- } \
-}
-
-/****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_DST(name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \
- dst_type * dst, \
- const src_type * src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x); \
- \
-static force_inline void \
-scaled_nearest_scanline_mips_##name##_##op (dst_type * pd, \
- const src_type * ps, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, w, \
- vx, unit_x); \
-} \
- \
-FAST_NEAREST_MAINLOOP (mips_##name##_cover_##op, \
- scaled_nearest_scanline_mips_##name##_##op, \
- src_type, dst_type, COVER) \
-FAST_NEAREST_MAINLOOP (mips_##name##_none_##op, \
- scaled_nearest_scanline_mips_##name##_##op, \
- src_type, dst_type, NONE) \
-FAST_NEAREST_MAINLOOP (mips_##name##_pad_##op, \
- scaled_nearest_scanline_mips_##name##_##op, \
- src_type, dst_type, PAD)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
-
-
-/*****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_nearest_scanline_##name##_##op##_asm_mips ( \
- dst_type * dst, \
- const src_type * src, \
- const uint8_t * mask, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x); \
- \
-static force_inline void \
-scaled_nearest_scanline_mips_##name##_##op (const uint8_t * mask, \
- dst_type * pd, \
- const src_type * ps, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps, \
- mask, w, \
- vx, unit_x); \
-} \
- \
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op, \
- scaled_nearest_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op, \
- scaled_nearest_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
-FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op, \
- scaled_nearest_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
-
-/* Provide entries for the fast path table */
-#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
- SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
-
-/****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips( \
- dst_type * dst, \
- const src_type * src_top, \
- const src_type * src_bottom, \
- int32_t w, \
- int wt, \
- int wb, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x); \
-static force_inline void \
-scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \
- const uint32_t * mask, \
- const src_type * src_top, \
- const src_type * src_bottom, \
- int32_t w, \
- int wt, \
- int wb, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top, \
- src_bottom, w, \
- wt, wb, \
- vx, unit_x); \
-} \
- \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint32_t, dst_type, COVER, FLAG_NONE) \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint32_t, dst_type, NONE, FLAG_NONE) \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint32_t, dst_type, PAD, FLAG_NONE) \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint32_t, dst_type, NORMAL, \
- FLAG_NONE)
-
-/*****************************************************************************/
-
-#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op, \
- src_type, dst_type) \
-void \
-pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips ( \
- dst_type * dst, \
- const uint8_t * mask, \
- const src_type * top, \
- const src_type * bottom, \
- int wt, \
- int wb, \
- pixman_fixed_t x, \
- pixman_fixed_t ux, \
- int width); \
- \
-static force_inline void \
-scaled_bilinear_scanline_mips_##name##_##op (dst_type * dst, \
- const uint8_t * mask, \
- const src_type * src_top, \
- const src_type * src_bottom, \
- int32_t w, \
- int wt, \
- int wb, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx, \
- pixman_bool_t zero_src) \
-{ \
- if ((flags & SKIP_ZERO_SRC) && zero_src) \
- return; \
- pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips ( \
- dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
-} \
- \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, COVER, \
- FLAG_HAVE_NON_SOLID_MASK) \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, NONE, \
- FLAG_HAVE_NON_SOLID_MASK) \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, PAD, \
- FLAG_HAVE_NON_SOLID_MASK) \
-FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op, \
- scaled_bilinear_scanline_mips_##name##_##op, \
- src_type, uint8_t, dst_type, NORMAL, \
- FLAG_HAVE_NON_SOLID_MASK)
-
-#endif //PIXMAN_MIPS_DSPR2_H
diff --git a/pixman/pixman-mips-memcpy-asm.S b/pixman/pixman-mips-memcpy-asm.S
deleted file mode 100644
index 9ad6da5..0000000
--- a/pixman/pixman-mips-memcpy-asm.S
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Copyright (c) 2012
- * MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include "pixman-mips-dspr2-asm.h"
-
-/*
- * This routine could be optimized for MIPS64. The current code only
- * uses MIPS32 instructions.
- */
-
-#ifdef EB
-# define LWHI lwl /* high part is left in big-endian */
-# define SWHI swl /* high part is left in big-endian */
-# define LWLO lwr /* low part is right in big-endian */
-# define SWLO swr /* low part is right in big-endian */
-#else
-# define LWHI lwr /* high part is right in little-endian */
-# define SWHI swr /* high part is right in little-endian */
-# define LWLO lwl /* low part is left in big-endian */
-# define SWLO swl /* low part is left in big-endian */
-#endif
-
-LEAF_MIPS32R2(pixman_mips_fast_memcpy)
-
- slti AT, a2, 8
- bne AT, zero, $last8
- move v0, a0 /* memcpy returns the dst pointer */
-
-/* Test if the src and dst are word-aligned, or can be made word-aligned */
- xor t8, a1, a0
- andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */
-
- bne t8, zero, $unaligned
- negu a3, a0
-
- andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */
- beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */
- subu a2, a2, a3 /* now a2 is the remining bytes count */
-
- LWHI t8, 0(a1)
- addu a1, a1, a3
- SWHI t8, 0(a0)
- addu a0, a0, a3
-
-/* Now the dst/src are mutually word-aligned with word-aligned addresses */
-$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
- /* t8 is the byte count after 64-byte chunks */
-
- beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */
- /* There will be at most 1 32-byte chunk after it */
- subu a3, a2, t8 /* subtract from a2 the reminder */
- /* Here a3 counts bytes in 16w chunks */
- addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
-
- addu t0, a0, a2 /* t0 is the "past the end" address */
-
-/*
- * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
- * the "t0-32" address
- * This means: for x=128 the last "safe" a0 address is "t0-160"
- * Alternatively, for x=64 the last "safe" a0 address is "t0-96"
- * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
- */
- subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
-
- pref 0, 0(a1) /* bring the first line of src, addr 0 */
- pref 0, 32(a1) /* bring the second line of src, addr 32 */
- pref 0, 64(a1) /* bring the third line of src, addr 64 */
- pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
-/* In case the a0 > t9 don't use "pref 30" at all */
- sgtu v1, a0, t9
- bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */
- nop
-/* otherwise, start with using pref30 */
- pref 30, 64(a0)
-$loop16w:
- pref 0, 96(a1)
- lw t0, 0(a1)
- bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */
- lw t1, 4(a1)
- pref 30, 96(a0) /* continue setting up the dest, addr 96 */
-$skip_pref30_96:
- lw t2, 8(a1)
- lw t3, 12(a1)
- lw t4, 16(a1)
- lw t5, 20(a1)
- lw t6, 24(a1)
- lw t7, 28(a1)
- pref 0, 128(a1) /* bring the next lines of src, addr 128 */
-
- sw t0, 0(a0)
- sw t1, 4(a0)
- sw t2, 8(a0)
- sw t3, 12(a0)
- sw t4, 16(a0)
- sw t5, 20(a0)
- sw t6, 24(a0)
- sw t7, 28(a0)
-
- lw t0, 32(a1)
- bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */
- lw t1, 36(a1)
- pref 30, 128(a0) /* continue setting up the dest, addr 128 */
-$skip_pref30_128:
- lw t2, 40(a1)
- lw t3, 44(a1)
- lw t4, 48(a1)
- lw t5, 52(a1)
- lw t6, 56(a1)
- lw t7, 60(a1)
- pref 0, 160(a1) /* bring the next lines of src, addr 160 */
-
- sw t0, 32(a0)
- sw t1, 36(a0)
- sw t2, 40(a0)
- sw t3, 44(a0)
- sw t4, 48(a0)
- sw t5, 52(a0)
- sw t6, 56(a0)
- sw t7, 60(a0)
-
- addiu a0, a0, 64 /* adding 64 to dest */
- sgtu v1, a0, t9
- bne a0, a3, $loop16w
- addiu a1, a1, 64 /* adding 64 to src */
- move a2, t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes to go */
-
-$chk8w:
- pref 0, 0x0(a1)
- andi t8, a2, 0x1f /* is there a 32-byte chunk? */
- /* the t8 is the reminder count past 32-bytes */
- beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */
- nop
-
- lw t0, 0(a1)
- lw t1, 4(a1)
- lw t2, 8(a1)
- lw t3, 12(a1)
- lw t4, 16(a1)
- lw t5, 20(a1)
- lw t6, 24(a1)
- lw t7, 28(a1)
- addiu a1, a1, 32
-
- sw t0, 0(a0)
- sw t1, 4(a0)
- sw t2, 8(a0)
- sw t3, 12(a0)
- sw t4, 16(a0)
- sw t5, 20(a0)
- sw t6, 24(a0)
- sw t7, 28(a0)
- addiu a0, a0, 32
-
-$chk1w:
- andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
- beq a2, t8, $last8
- subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
- addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
-
-/* copying in words (4-byte chunks) */
-$wordCopy_loop:
- lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */
- addiu a1, a1, 4
- addiu a0, a0, 4
- bne a0, a3, $wordCopy_loop
- sw t3, -4(a0)
-
-/* For the last (<8) bytes */
-$last8:
- blez a2, leave
- addu a3, a0, a2 /* a3 is the last dst address */
-$last8loop:
- lb v1, 0(a1)
- addiu a1, a1, 1
- addiu a0, a0, 1
- bne a0, a3, $last8loop
- sb v1, -1(a0)
-
-leave: j ra
- nop
-
-/*
- * UNALIGNED case
- */
-
-$unaligned:
- /* got here with a3="negu a0" */
- andi a3, a3, 0x3 /* test if the a0 is word aligned */
- beqz a3, $ua_chk16w
- subu a2, a2, a3 /* bytes left after initial a3 bytes */
-
- LWHI v1, 0(a1)
- LWLO v1, 3(a1)
- addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */
- SWHI v1, 0(a0)
- addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */
-
-$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
- /* t8 is the byte count after 64-byte chunks */
- beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */
- /* There will be at most 1 32-byte chunk after it */
- subu a3, a2, t8 /* subtract from a2 the reminder */
- /* Here a3 counts bytes in 16w chunks */
- addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
-
- addu t0, a0, a2 /* t0 is the "past the end" address */
-
- subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
-
- pref 0, 0(a1) /* bring the first line of src, addr 0 */
- pref 0, 32(a1) /* bring the second line of src, addr 32 */
- pref 0, 64(a1) /* bring the third line of src, addr 64 */
- pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
-/* In case the a0 > t9 don't use "pref 30" at all */
- sgtu v1, a0, t9
- bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */
- nop
-/* otherwise, start with using pref30 */
- pref 30, 64(a0)
-$ua_loop16w:
- pref 0, 96(a1)
- LWHI t0, 0(a1)
- LWLO t0, 3(a1)
- LWHI t1, 4(a1)
- bgtz v1, $ua_skip_pref30_96
- LWLO t1, 7(a1)
- pref 30, 96(a0) /* continue setting up the dest, addr 96 */
-$ua_skip_pref30_96:
- LWHI t2, 8(a1)
- LWLO t2, 11(a1)
- LWHI t3, 12(a1)
- LWLO t3, 15(a1)
- LWHI t4, 16(a1)
- LWLO t4, 19(a1)
- LWHI t5, 20(a1)
- LWLO t5, 23(a1)
- LWHI t6, 24(a1)
- LWLO t6, 27(a1)
- LWHI t7, 28(a1)
- LWLO t7, 31(a1)
- pref 0, 128(a1) /* bring the next lines of src, addr 128 */
-
- sw t0, 0(a0)
- sw t1, 4(a0)
- sw t2, 8(a0)
- sw t3, 12(a0)
- sw t4, 16(a0)
- sw t5, 20(a0)
- sw t6, 24(a0)
- sw t7, 28(a0)
-
- LWHI t0, 32(a1)
- LWLO t0, 35(a1)
- LWHI t1, 36(a1)
- bgtz v1, $ua_skip_pref30_128
- LWLO t1, 39(a1)
- pref 30, 128(a0) /* continue setting up the dest, addr 128 */
-$ua_skip_pref30_128:
- LWHI t2, 40(a1)
- LWLO t2, 43(a1)
- LWHI t3, 44(a1)
- LWLO t3, 47(a1)
- LWHI t4, 48(a1)
- LWLO t4, 51(a1)
- LWHI t5, 52(a1)
- LWLO t5, 55(a1)
- LWHI t6, 56(a1)
- LWLO t6, 59(a1)
- LWHI t7, 60(a1)
- LWLO t7, 63(a1)
- pref 0, 160(a1) /* bring the next lines of src, addr 160 */
-
- sw t0, 32(a0)
- sw t1, 36(a0)
- sw t2, 40(a0)
- sw t3, 44(a0)
- sw t4, 48(a0)
- sw t5, 52(a0)
- sw t6, 56(a0)
- sw t7, 60(a0)
-
- addiu a0, a0, 64 /* adding 64 to dest */
- sgtu v1, a0, t9
- bne a0, a3, $ua_loop16w
- addiu a1, a1, 64 /* adding 64 to src */
- move a2, t8
-
-/* Here we have src and dest word-aligned but less than 64-bytes to go */
-
-$ua_chk8w:
- pref 0, 0x0(a1)
- andi t8, a2, 0x1f /* is there a 32-byte chunk? */
- /* the t8 is the reminder count */
- beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */
-
- LWHI t0, 0(a1)
- LWLO t0, 3(a1)
- LWHI t1, 4(a1)
- LWLO t1, 7(a1)
- LWHI t2, 8(a1)
- LWLO t2, 11(a1)
- LWHI t3, 12(a1)
- LWLO t3, 15(a1)
- LWHI t4, 16(a1)
- LWLO t4, 19(a1)
- LWHI t5, 20(a1)
- LWLO t5, 23(a1)
- LWHI t6, 24(a1)
- LWLO t6, 27(a1)
- LWHI t7, 28(a1)
- LWLO t7, 31(a1)
- addiu a1, a1, 32
-
- sw t0, 0(a0)
- sw t1, 4(a0)
- sw t2, 8(a0)
- sw t3, 12(a0)
- sw t4, 16(a0)
- sw t5, 20(a0)
- sw t6, 24(a0)
- sw t7, 28(a0)
- addiu a0, a0, 32
-
-$ua_chk1w:
- andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
- beq a2, t8, $ua_smallCopy
- subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
- addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
-
-/* copying in words (4-byte chunks) */
-$ua_wordCopy_loop:
- LWHI v1, 0(a1)
- LWLO v1, 3(a1)
- addiu a1, a1, 4
- addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */
- bne a0, a3, $ua_wordCopy_loop
- sw v1, -4(a0)
-
-/* Now less than 4 bytes (value in a2) left to copy */
-$ua_smallCopy:
- beqz a2, leave
- addu a3, a0, a2 /* a3 is the last dst address */
-$ua_smallCopy_loop:
- lb v1, 0(a1)
- addiu a1, a1, 1
- addiu a0, a0, 1
- bne a0, a3, $ua_smallCopy_loop
- sb v1, -1(a0)
-
- j ra
- nop
-
-END(pixman_mips_fast_memcpy)
diff --git a/pixman/pixman-mips32r2-asm.S b/pixman/pixman-mips32r2-asm.S
index 468937c..3f73e41 100644
--- a/pixman/pixman-mips32r2-asm.S
+++ b/pixman/pixman-mips32r2-asm.S
@@ -28,3 +28,492 @@
*/
#include "pixman-mips32r2-asm.h"
+
+/*
+ * This routine could be optimized for MIPS64. The current code only
+ * uses MIPS32 instructions.
+ */
+
+#ifdef EB
+# define LWHI lwl /* high part is left in big-endian */
+# define SWHI swl /* high part is left in big-endian */
+# define LWLO lwr /* low part is right in big-endian */
+# define SWLO swr /* low part is right in big-endian */
+#else
+# define LWHI lwr /* high part is right in little-endian */
+# define SWHI swr /* high part is right in little-endian */
+# define LWLO lwl /* low part is left in big-endian */
+# define SWLO swl /* low part is left in big-endian */
+#endif
+
+LEAF_MIPS32R2(pixman_fast_memcpy)
+
+ slti AT, a2, 8
+ bne AT, zero, $last8
+ move v0, a0 /* memcpy returns the dst pointer */
+
+/* Test if the src and dst are word-aligned, or can be made word-aligned */
+ xor t8, a1, a0
+ andi t8, t8, 0x3 /* t8 is a0/a1 word-displacement */
+
+ bne t8, zero, $unaligned
+ negu a3, a0
+
+ andi a3, a3, 0x3 /* we need to copy a3 bytes to make a0/a1 aligned */
+ beq a3, zero, $chk16w /* when a3=0 then the dst (a0) is word-aligned */
+ subu a2, a2, a3 /* now a2 is the remining bytes count */
+
+ LWHI t8, 0(a1)
+ addu a1, a1, a3
+ SWHI t8, 0(a0)
+ addu a0, a0, a3
+
+/* Now the dst/src are mutually word-aligned with word-aligned addresses */
+$chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
+ /* t8 is the byte count after 64-byte chunks */
+
+ beq a2, t8, $chk8w /* if a2==t8, no 64-byte chunks */
+ /* There will be at most 1 32-byte chunk after it */
+ subu a3, a2, t8 /* subtract from a2 the reminder */
+ /* Here a3 counts bytes in 16w chunks */
+ addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
+
+ addu t0, a0, a2 /* t0 is the "past the end" address */
+
+/*
+ * When in the loop we exercise "pref 30, x(a0)", the a0+x should not be past
+ * the "t0-32" address
+ * This means: for x=128 the last "safe" a0 address is "t0-160"
+ * Alternatively, for x=64 the last "safe" a0 address is "t0-96"
+ * In the current version we use "pref 30, 128(a0)", so "t0-160" is the limit
+ */
+ subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
+
+ pref 0, 0(a1) /* bring the first line of src, addr 0 */
+ pref 0, 32(a1) /* bring the second line of src, addr 32 */
+ pref 0, 64(a1) /* bring the third line of src, addr 64 */
+ pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
+/* In case the a0 > t9 don't use "pref 30" at all */
+ sgtu v1, a0, t9
+ bgtz v1, $loop16w /* skip "pref 30, 64(a0)" for too short arrays */
+ nop
+/* otherwise, start with using pref30 */
+ pref 30, 64(a0)
+$loop16w:
+ pref 0, 96(a1)
+ lw t0, 0(a1)
+ bgtz v1, $skip_pref30_96 /* skip "pref 30, 96(a0)" */
+ lw t1, 4(a1)
+ pref 30, 96(a0) /* continue setting up the dest, addr 96 */
+$skip_pref30_96:
+ lw t2, 8(a1)
+ lw t3, 12(a1)
+ lw t4, 16(a1)
+ lw t5, 20(a1)
+ lw t6, 24(a1)
+ lw t7, 28(a1)
+ pref 0, 128(a1) /* bring the next lines of src, addr 128 */
+
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ sw t2, 8(a0)
+ sw t3, 12(a0)
+ sw t4, 16(a0)
+ sw t5, 20(a0)
+ sw t6, 24(a0)
+ sw t7, 28(a0)
+
+ lw t0, 32(a1)
+ bgtz v1, $skip_pref30_128 /* skip "pref 30, 128(a0)" */
+ lw t1, 36(a1)
+ pref 30, 128(a0) /* continue setting up the dest, addr 128 */
+$skip_pref30_128:
+ lw t2, 40(a1)
+ lw t3, 44(a1)
+ lw t4, 48(a1)
+ lw t5, 52(a1)
+ lw t6, 56(a1)
+ lw t7, 60(a1)
+ pref 0, 160(a1) /* bring the next lines of src, addr 160 */
+
+ sw t0, 32(a0)
+ sw t1, 36(a0)
+ sw t2, 40(a0)
+ sw t3, 44(a0)
+ sw t4, 48(a0)
+ sw t5, 52(a0)
+ sw t6, 56(a0)
+ sw t7, 60(a0)
+
+ addiu a0, a0, 64 /* adding 64 to dest */
+ sgtu v1, a0, t9
+ bne a0, a3, $loop16w
+ addiu a1, a1, 64 /* adding 64 to src */
+ move a2, t8
+
+/* Here we have src and dest word-aligned but less than 64-bytes to go */
+
+$chk8w:
+ pref 0, 0x0(a1)
+ andi t8, a2, 0x1f /* is there a 32-byte chunk? */
+ /* the t8 is the reminder count past 32-bytes */
+ beq a2, t8, $chk1w /* when a2=t8, no 32-byte chunk */
+ nop
+
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ lw t2, 8(a1)
+ lw t3, 12(a1)
+ lw t4, 16(a1)
+ lw t5, 20(a1)
+ lw t6, 24(a1)
+ lw t7, 28(a1)
+ addiu a1, a1, 32
+
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ sw t2, 8(a0)
+ sw t3, 12(a0)
+ sw t4, 16(a0)
+ sw t5, 20(a0)
+ sw t6, 24(a0)
+ sw t7, 28(a0)
+ addiu a0, a0, 32
+
+$chk1w:
+ andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
+ beq a2, t8, $last8
+ subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
+ addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
+
+/* copying in words (4-byte chunks) */
+$wordCopy_loop:
+ lw t3, 0(a1) /* the first t3 may be equal t0 ... optimize? */
+ addiu a1, a1, 4
+ addiu a0, a0, 4
+ bne a0, a3, $wordCopy_loop
+ sw t3, -4(a0)
+
+/* For the last (<8) bytes */
+$last8:
+ blez a2, leave
+ addu a3, a0, a2 /* a3 is the last dst address */
+$last8loop:
+ lb v1, 0(a1)
+ addiu a1, a1, 1
+ addiu a0, a0, 1
+ bne a0, a3, $last8loop
+ sb v1, -1(a0)
+
+leave: j ra
+ nop
+
+/*
+ * UNALIGNED case
+ */
+
+$unaligned:
+ /* got here with a3="negu a0" */
+ andi a3, a3, 0x3 /* test if the a0 is word aligned */
+ beqz a3, $ua_chk16w
+ subu a2, a2, a3 /* bytes left after initial a3 bytes */
+
+ LWHI v1, 0(a1)
+ LWLO v1, 3(a1)
+ addu a1, a1, a3 /* a3 may be here 1, 2 or 3 */
+ SWHI v1, 0(a0)
+ addu a0, a0, a3 /* below the dst will be word aligned (NOTE1) */
+
+$ua_chk16w: andi t8, a2, 0x3f /* any whole 64-byte chunks? */
+ /* t8 is the byte count after 64-byte chunks */
+ beq a2, t8, $ua_chk8w /* if a2==t8, no 64-byte chunks */
+ /* There will be at most 1 32-byte chunk after it */
+ subu a3, a2, t8 /* subtract from a2 the reminder */
+ /* Here a3 counts bytes in 16w chunks */
+ addu a3, a0, a3 /* Now a3 is the final dst after 64-byte chunks */
+
+ addu t0, a0, a2 /* t0 is the "past the end" address */
+
+ subu t9, t0, 160 /* t9 is the "last safe pref 30, 128(a0)" address */
+
+ pref 0, 0(a1) /* bring the first line of src, addr 0 */
+ pref 0, 32(a1) /* bring the second line of src, addr 32 */
+ pref 0, 64(a1) /* bring the third line of src, addr 64 */
+ pref 30, 32(a0) /* safe, as we have at least 64 bytes ahead */
+/* In case the a0 > t9 don't use "pref 30" at all */
+ sgtu v1, a0, t9
+ bgtz v1, $ua_loop16w /* skip "pref 30, 64(a0)" for too short arrays */
+ nop
+/* otherwise, start with using pref30 */
+ pref 30, 64(a0)
+$ua_loop16w:
+ pref 0, 96(a1)
+ LWHI t0, 0(a1)
+ LWLO t0, 3(a1)
+ LWHI t1, 4(a1)
+ bgtz v1, $ua_skip_pref30_96
+ LWLO t1, 7(a1)
+ pref 30, 96(a0) /* continue setting up the dest, addr 96 */
+$ua_skip_pref30_96:
+ LWHI t2, 8(a1)
+ LWLO t2, 11(a1)
+ LWHI t3, 12(a1)
+ LWLO t3, 15(a1)
+ LWHI t4, 16(a1)
+ LWLO t4, 19(a1)
+ LWHI t5, 20(a1)
+ LWLO t5, 23(a1)
+ LWHI t6, 24(a1)
+ LWLO t6, 27(a1)
+ LWHI t7, 28(a1)
+ LWLO t7, 31(a1)
+ pref 0, 128(a1) /* bring the next lines of src, addr 128 */
+
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ sw t2, 8(a0)
+ sw t3, 12(a0)
+ sw t4, 16(a0)
+ sw t5, 20(a0)
+ sw t6, 24(a0)
+ sw t7, 28(a0)
+
+ LWHI t0, 32(a1)
+ LWLO t0, 35(a1)
+ LWHI t1, 36(a1)
+ bgtz v1, $ua_skip_pref30_128
+ LWLO t1, 39(a1)
+ pref 30, 128(a0) /* continue setting up the dest, addr 128 */
+$ua_skip_pref30_128:
+ LWHI t2, 40(a1)
+ LWLO t2, 43(a1)
+ LWHI t3, 44(a1)
+ LWLO t3, 47(a1)
+ LWHI t4, 48(a1)
+ LWLO t4, 51(a1)
+ LWHI t5, 52(a1)
+ LWLO t5, 55(a1)
+ LWHI t6, 56(a1)
+ LWLO t6, 59(a1)
+ LWHI t7, 60(a1)
+ LWLO t7, 63(a1)
+ pref 0, 160(a1) /* bring the next lines of src, addr 160 */
+
+ sw t0, 32(a0)
+ sw t1, 36(a0)
+ sw t2, 40(a0)
+ sw t3, 44(a0)
+ sw t4, 48(a0)
+ sw t5, 52(a0)
+ sw t6, 56(a0)
+ sw t7, 60(a0)
+
+ addiu a0, a0, 64 /* adding 64 to dest */
+ sgtu v1, a0, t9
+ bne a0, a3, $ua_loop16w
+ addiu a1, a1, 64 /* adding 64 to src */
+ move a2, t8
+
+/* Here we have src and dest word-aligned but less than 64-bytes to go */
+
+$ua_chk8w:
+ pref 0, 0x0(a1)
+ andi t8, a2, 0x1f /* is there a 32-byte chunk? */
+ /* the t8 is the reminder count */
+ beq a2, t8, $ua_chk1w /* when a2=t8, no 32-byte chunk */
+
+ LWHI t0, 0(a1)
+ LWLO t0, 3(a1)
+ LWHI t1, 4(a1)
+ LWLO t1, 7(a1)
+ LWHI t2, 8(a1)
+ LWLO t2, 11(a1)
+ LWHI t3, 12(a1)
+ LWLO t3, 15(a1)
+ LWHI t4, 16(a1)
+ LWLO t4, 19(a1)
+ LWHI t5, 20(a1)
+ LWLO t5, 23(a1)
+ LWHI t6, 24(a1)
+ LWLO t6, 27(a1)
+ LWHI t7, 28(a1)
+ LWLO t7, 31(a1)
+ addiu a1, a1, 32
+
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ sw t2, 8(a0)
+ sw t3, 12(a0)
+ sw t4, 16(a0)
+ sw t5, 20(a0)
+ sw t6, 24(a0)
+ sw t7, 28(a0)
+ addiu a0, a0, 32
+
+$ua_chk1w:
+ andi a2, t8, 0x3 /* now a2 is the reminder past 1w chunks */
+ beq a2, t8, $ua_smallCopy
+ subu a3, t8, a2 /* a3 is count of bytes in 1w chunks */
+ addu a3, a0, a3 /* now a3 is the dst address past the 1w chunks */
+
+/* copying in words (4-byte chunks) */
+$ua_wordCopy_loop:
+ LWHI v1, 0(a1)
+ LWLO v1, 3(a1)
+ addiu a1, a1, 4
+ addiu a0, a0, 4 /* note: dst=a0 is word aligned here, see NOTE1 */
+ bne a0, a3, $ua_wordCopy_loop
+ sw v1, -4(a0)
+
+/* Now less than 4 bytes (value in a2) left to copy */
+$ua_smallCopy:
+ beqz a2, leave
+ addu a3, a0, a2 /* a3 is the last dst address */
+$ua_smallCopy_loop:
+ lb v1, 0(a1)
+ addiu a1, a1, 1
+ addiu a0, a0, 1
+ bne a0, a3, $ua_smallCopy_loop
+ sb v1, -1(a0)
+
+ j ra
+ nop
+
+END_MIPS32R2(pixman_fast_memcpy)
+
+LEAF_MIPS32R2(pixman_fill_buff32)
+/*
+ * a0 - *dest
+ * a1 - count (bytes)
+ * a2 - value to fill buffer with
+ */
+
+ beqz a1, 3f
+ nop
+ srl t1, a1, 5 /* t1 how many multiples of 32 bytes */
+ beqz t1, 2f
+ nop
+1:
+ addiu t1, t1, -1
+ beqz t1, 11f
+ addiu a1, a1, -32
+ pref 30, 32(a0)
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+11:
+ sw a2, 0(a0)
+ sw a2, 4(a0)
+ sw a2, 8(a0)
+ sw a2, 12(a0)
+ sw a2, 16(a0)
+ sw a2, 20(a0)
+ sw a2, 24(a0)
+ sw a2, 28(a0)
+ addiu a0, a0, 32
+2:
+ blez a1, 3f
+ addiu a1, a1, -4
+ sw a2, 0(a0)
+ b 2b
+ addiu a0, a0, 4
+3:
+ jr ra
+ nop
+
+END_MIPS32R2(pixman_fill_buff32)
+
+LEAF_MIPS32R2(pixman_composite_src_x888_8888_asm)
+/*
+ * a0 - dst (a8r8g8b8)
+ * a1 - src (x8r8g8b8)
+ * a2 - w
+ */
+
+ beqz a2, 4f
+ nop
+ li t9, 0xff000000
+ srl t8, a2, 3 /* t1 = how many multiples of 8 src pixels */
+ beqz t8, 3f /* branch if less than 8 src pixels */
+ nop
+1:
+ addiu t8, t8, -1
+ beqz t8, 2f
+ addiu a2, a2, -8
+ pref 0, 32(a1)
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ lw t2, 8(a1)
+ lw t3, 12(a1)
+ lw t4, 16(a1)
+ lw t5, 20(a1)
+ lw t6, 24(a1)
+ lw t7, 28(a1)
+ addiu a1, a1, 32
+ or t0, t0, t9
+ or t1, t1, t9
+ or t2, t2, t9
+ or t3, t3, t9
+ or t4, t4, t9
+ or t5, t5, t9
+ or t6, t6, t9
+ or t7, t7, t9
+ pref 30, 32(a0)
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ sw t2, 8(a0)
+ sw t3, 12(a0)
+ sw t4, 16(a0)
+ sw t5, 20(a0)
+ sw t6, 24(a0)
+ sw t7, 28(a0)
+ b 1b
+ addiu a0, a0, 32
+2:
+ lw t0, 0(a1)
+ lw t1, 4(a1)
+ lw t2, 8(a1)
+ lw t3, 12(a1)
+ lw t4, 16(a1)
+ lw t5, 20(a1)
+ lw t6, 24(a1)
+ lw t7, 28(a1)
+ addiu a1, a1, 32
+ or t0, t0, t9
+ or t1, t1, t9
+ or t2, t2, t9
+ or t3, t3, t9
+ or t4, t4, t9
+ or t5, t5, t9
+ or t6, t6, t9
+ or t7, t7, t9
+ sw t0, 0(a0)
+ sw t1, 4(a0)
+ sw t2, 8(a0)
+ sw t3, 12(a0)
+ sw t4, 16(a0)
+ sw t5, 20(a0)
+ sw t6, 24(a0)
+ sw t7, 28(a0)
+ beqz a2, 4f
+ addiu a0, a0, 32
+3:
+ lw t0, 0(a1)
+ addiu a1, a1, 4
+ addiu a2, a2, -1
+ or t1, t0, t9
+ sw t1, 0(a0)
+ bnez a2, 3b
+ addiu a0, a0, 4
+4:
+ jr ra
+ nop
+
+END_MIPS32R2(pixman_composite_src_x888_8888_asm)
diff --git a/pixman/pixman-mips32r2-asm.h b/pixman/pixman-mips32r2-asm.h
index b8f1773..2b88c00 100644
--- a/pixman/pixman-mips32r2-asm.h
+++ b/pixman/pixman-mips32r2-asm.h
@@ -32,4 +32,53 @@
#ifndef PIXMAN_MIPS32R2_ASM_H
#define PIXMAN_MIPS32R2_ASM_H
+#include "pixman-mips-common-asm.h"
+
+/*
+ * Conversion of single r5g6b5 pixel (in_565) to single a8r8g8b8 pixel
+ * returned in (out_8888) register. Requires two temporary registers
+ * (scratch1 and scratch2).
+ */
+.macro CONVERT_1x0565_TO_1x8888 in_565, \
+ out_8888, \
+ scratch1, scratch2
+ lui \out_8888, 0xff00
+ sll \scratch1, \in_565, 0x3
+ andi \scratch2, \scratch1, 0xff
+ ext \scratch1, \in_565, 0x2, 0x3
+ or \scratch1, \scratch2, \scratch1
+ or \out_8888, \out_8888, \scratch1
+
+ sll \scratch1, \in_565, 0x5
+ andi \scratch1, \scratch1, 0xfc00
+ srl \scratch2, \in_565, 0x1
+ andi \scratch2, \scratch2, 0x300
+ or \scratch2, \scratch1, \scratch2
+ or \out_8888, \out_8888, \scratch2
+
+ andi \scratch1, \in_565, 0xf800
+ srl \scratch2, \scratch1, 0x5
+ andi \scratch2, \scratch2, 0xff00
+ or \scratch1, \scratch1, \scratch2
+ sll \scratch1, \scratch1, 0x8
+ or \out_8888, \out_8888, \scratch1
+.endm
+
+/*
+ * Conversion of single a8r8g8b8 pixel (in_8888) to single r5g6b5 pixel
+ * returned in (out_565) register. Requires two temporary registers
+ * (scratch1 and scratch2).
+ */
+.macro CONVERT_1x8888_TO_1x0565 in_8888, \
+ out_565, \
+ scratch1, scratch2
+ ext \out_565, \in_8888, 0x3, 0x5
+ srl \scratch1, \in_8888, 0x5
+ andi \scratch1, \scratch1, 0x07e0
+ srl \scratch2, \in_8888, 0x8
+ andi \scratch2, \scratch2, 0xf800
+ or \out_565, \out_565, \scratch1
+ or \out_565, \out_565, \scratch2
+.endm
+
#endif /* PIXMAN_MIPS32R2_ASM_H */
diff --git a/pixman/pixman-mips32r2.c b/pixman/pixman-mips32r2.c
index f3500bc..4a6d29e 100644
--- a/pixman/pixman-mips32r2.c
+++ b/pixman/pixman-mips32r2.c
@@ -36,8 +36,139 @@
#include "pixman-private.h"
#include "pixman-mips-common.h"
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, src_x888_8888,
+ uint32_t, 1, uint32_t, 1, _mips32r2)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0565_0565,
+ uint16_t, 1, uint16_t, 1, _mips32r2)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_8888_8888,
+ uint32_t, 1, uint32_t, 1, _mips32r2)
+PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (DO_FAST_MEMCPY, src_0888_0888,
+ uint8_t, 3, uint8_t, 3, _mips32r2)
+
+static pixman_bool_t
+mips32r2_fill (pixman_implementation_t *imp,
+ uint32_t * bits,
+ int stride,
+ int bpp,
+ int x,
+ int y,
+ int width,
+ int height,
+ uint32_t _xor)
+{
+ uint8_t *byte_line;
+ uint32_t byte_width;
+ int i, short_stride;
+ uint16_t *dst;
+ uint16_t v;
+
+ switch (bpp)
+ {
+ case 16:
+ short_stride = (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
+ dst = (uint16_t *)bits;
+ v = _xor & 0xffff;
+
+ dst = dst + y * short_stride + x;
+
+ while (height--)
+ {
+ for (i = 0; i < width; ++i)
+ dst[i] = v;
+
+ dst += short_stride;
+ }
+ return TRUE;
+ case 32:
+ stride = stride * (int) sizeof (uint32_t) / 4;
+ byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+ byte_width = width * 4;
+ stride *= 4;
+
+ while (height--)
+ {
+ uint8_t *dst = byte_line;
+ byte_line += stride;
+ pixman_fill_buff32_mips32r2 (dst, byte_width, _xor);
+ }
+ return TRUE;
+ default:
+ return FALSE;
+ }
+}
+
+static pixman_bool_t
+mips32r2_blt (pixman_implementation_t *imp,
+ uint32_t * src_bits,
+ uint32_t * dst_bits,
+ int src_stride,
+ int dst_stride,
+ int src_bpp,
+ int dst_bpp,
+ int src_x,
+ int src_y,
+ int dest_x,
+ int dest_y,
+ int width,
+ int height)
+{
+ if (src_bpp != dst_bpp)
+ return FALSE;
+
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+ uint32_t byte_width;
+ int32_t temp;
+
+ temp = src_bpp >> 3;
+
+ src_stride = src_stride * (int) sizeof (uint32_t) / temp;
+ dst_stride = dst_stride * (int) sizeof (uint32_t) / temp;
+ if (src_bpp == 16)
+ {
+ src_bytes =(uint8_t *)(((uint16_t *)src_bits)
+ + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint16_t *)dst_bits)
+ + dst_stride * (dest_y) + (dest_x));
+ }
+ else if (src_bpp == 32)
+ {
+ src_bytes = (uint8_t *)(((uint32_t *)src_bits)
+ + src_stride * (src_y) + (src_x));
+ dst_bytes = (uint8_t *)(((uint32_t *)dst_bits)
+ + dst_stride * (dest_y) + (dest_x));
+ }
+ else
+ return FALSE;
+
+ byte_width = width * temp;
+ src_stride *= temp;
+ dst_stride *= temp;
+
+ while (height--)
+ {
+ uint8_t *src = src_bytes;
+ uint8_t *dst = dst_bytes;
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ pixman_fast_memcpy_mips32r2 (dst, src, byte_width);
+ }
+ return TRUE;
+}
+
static const pixman_fast_path_t mips32r2_fast_paths[] =
{
+ PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, mips_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, mips_composite_src_0565_0565),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, mips_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, mips_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, mips_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, mips_composite_src_8888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, mips_composite_src_0888_0888),
+ PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
+ PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
{ PIXMAN_OP_NONE },
};
@@ -47,5 +178,8 @@ _pixman_implementation_create_mips32r2 (pixman_implementation_t *fallback)
pixman_implementation_t *imp =
_pixman_implementation_create (fallback, mips32r2_fast_paths);
+ imp->blt = mips32r2_blt;
+ imp->fill = mips32r2_fill;
+
return imp;
}
--
1.7.3
More information about the Pixman
mailing list