[Liboil-commit] liboil/i386 liboil/i386_amd64 liboil/liboilclasses.h liboil/liboilfuncs-04.h liboil/liboilfuncs-doc.h liboil/liboilfuncs.h liboil/liboilmarshal.c liboil/liboiltest.c liboil/liboiltrampolines.c liboil/ref
David Schleef
ds at kemper.freedesktop.org
Wed Feb 13 02:25:45 PST 2008
liboil/i386/mas.c | 113 ++++++++++++++++++++++++++++++++++++++
liboil/i386_amd64/sad8x8.c | 133 +++++++++++++++++++++++++++++++++++++++++++++
liboil/liboilclasses.h | 4 +
liboil/liboilfuncs-04.h | 4 +
liboil/liboilfuncs-doc.h | 4 +
liboil/liboilfuncs.h | 12 ++++
liboil/liboilmarshal.c | 6 ++
liboil/liboiltest.c | 8 +-
liboil/liboiltrampolines.c | 40 +++++++++++++
liboil/ref/mas.c | 20 ++++++
liboil/ref/wavelet.c | 107 ++++++++++++++++++++++++++++++++++++
11 files changed, 448 insertions(+), 3 deletions(-)
New commits:
commit 90c27e2a2ef4c51fa4d18c8d2cb0c8b0fd92454f
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Wed Feb 13 02:27:30 2008 -0800
Copy more functions from schro.
diff --git a/liboil/i386/mas.c b/liboil/i386/mas.c
index ebce3f6..5153ec2 100644
--- a/liboil/i386/mas.c
+++ b/liboil/i386/mas.c
@@ -860,3 +860,116 @@ OIL_DEFINE_IMPL_FULL (mas8_addc_rshift_decim2_u8_mmx_4,
#endif
+void
+mas8_across_u8_mmx_3 (uint8_t *d, const uint8_t *s1_nx8, int ss1,
+ const int16_t *s2_8, const int16_t *s3_2, int n)
+{
+ int i;
+ int x;
+
+ while(n&3) {
+ x = 0;
+ for(i=0;i<8;i++){
+ x += OIL_GET(s1_nx8, i*ss1, uint8_t)*s2_8[i];
+ }
+ *d = CLAMP((x + s3_2[0])>>s3_2[1],0,255);
+ d++;
+ s1_nx8++;
+ n--;
+ }
+
+ if (n == 0) return;
+ n>>=2;
+ __asm__ __volatile__("\n"
+ " pxor %%mm7, %%mm7\n"
+
+ " movd (%[s3_2]), %%mm6\n"
+
+ " movzwl 2(%[s3_2]), %%ecx\n"
+ " movd %%ecx, %%mm5\n"
+
+ " movq 0(%[s2_8]), %%mm3\n"
+ " movq 8(%[s2_8]), %%mm4\n"
+ :
+ : [s2_8] "r" (s2_8),
+ [s3_2] "r" (s3_2)
+ : "ecx");
+
+ while (n > 0) {
+ const uint8_t *p = s1_nx8;
+ __asm__ __volatile__("\n"
+ "1:\n"
+ /* load 128 */
+ " pshufw $0x00, %%mm6, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x00, %%mm3, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x55*1, %%mm3, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x55*2, %%mm3, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x55*3, %%mm3, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x00, %%mm4, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x55*1, %%mm4, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x55*2, %%mm4, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " movd 0(%[p]), %%mm0\n"
+ " addl %[ss1], %[p]\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pshufw $0x55*3, %%mm4, %%mm1\n"
+ " pmullw %%mm1, %%mm0\n"
+ " paddw %%mm0, %%mm2\n"
+
+ " psraw %%mm5, %%mm2\n"
+ " pmaxsw %%mm7, %%mm2\n"
+ " packuswb %%mm2, %%mm2\n"
+ " movd %%mm2, 0(%[d])\n"
+ : [p] "+r" (p)
+ : [d] "r" (d), [ss1] "r" (ss1));
+ d+=4;
+ s1_nx8+=4;
+ n--;
+ }
+
+ asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (mas8_across_u8_mmx_3, mas8_across_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+
diff --git a/liboil/i386_amd64/sad8x8.c b/liboil/i386_amd64/sad8x8.c
index 34cf96d..07cf9f3 100644
--- a/liboil/i386_amd64/sad8x8.c
+++ b/liboil/i386_amd64/sad8x8.c
@@ -488,3 +488,136 @@ sad16x16_u8_mmxext (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
}
OIL_DEFINE_IMPL_FULL (sad16x16_u8_mmxext, sad16x16_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
+void
+combine4_12xn_u8_mmx (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int j;
+
+ asm volatile ("\n"
+ " pxor %%mm7, %%mm7\n"
+ " movq 0(%0), %%mm6\n"
+ " movd 8(%0), %%mm4\n"
+ " pshufw $0x00, %%mm4, %%mm4\n"
+ ::"r" (s5_6));
+
+ for(j=0;j<n;j++){
+ asm volatile ("\n"
+#define DO_4(offset) \
+ " movd " #offset "(%1), %%mm0\n" \
+ " punpcklbw %%mm7, %%mm0\n" \
+ " pshufw $0x00, %%mm6, %%mm5\n" \
+ " pmullw %%mm5, %%mm0\n" \
+ " movd " #offset "(%2), %%mm1\n" \
+ " punpcklbw %%mm7, %%mm1\n" \
+ " pshufw $0x55, %%mm6, %%mm5\n" \
+ " pmullw %%mm5, %%mm1\n" \
+ " movd " #offset "(%3), %%mm2\n" \
+ " punpcklbw %%mm7, %%mm2\n" \
+ " pshufw $0xaa, %%mm6, %%mm5\n" \
+ " pmullw %%mm5, %%mm2\n" \
+ " movd " #offset "(%4), %%mm3\n" \
+ " punpcklbw %%mm7, %%mm3\n" \
+ " pshufw $0xff, %%mm6, %%mm5\n" \
+ " pmullw %%mm5, %%mm3\n" \
+ " paddw %%mm1, %%mm0\n" \
+ " paddw %%mm2, %%mm0\n" \
+ " paddw %%mm3, %%mm0\n" \
+ " paddw %%mm4, %%mm0\n" \
+ " psrlw $4, %%mm0\n" \
+ " packuswb %%mm0, %%mm0\n" \
+ " movd %%mm0, " #offset "(%0)\n"
+
+ DO_4(0)
+ DO_4(4)
+ DO_4(8)
+
+ :
+ : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+ asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_12xn_u8_mmx, combine4_12xn_u8, OIL_IMPL_FLAG_MMX);
+
+void
+combine4_8xn_u8_mmx (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int j;
+
+ asm volatile ("\n"
+ " pxor %%mm7, %%mm7\n"
+ " movq 0(%0), %%mm6\n"
+ " movd 8(%0), %%mm4\n"
+ " pshufw $0x00, %%mm4, %%mm4\n"
+ ::"r" (s5_6));
+
+ for(j=0;j<n;j++){
+ asm volatile ("\n"
+ DO_4(0)
+ DO_4(4)
+
+ :
+ : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+ asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_8xn_u8_mmx, combine4_8xn_u8, OIL_IMPL_FLAG_MMX);
+
+void
+combine4_16xn_u8_mmx (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int j;
+
+ asm volatile ("\n"
+ " pxor %%mm7, %%mm7\n"
+ " movq 0(%0), %%mm6\n"
+ " movd 8(%0), %%mm4\n"
+ " pshufw $0x00, %%mm4, %%mm4\n"
+ ::"r" (s5_6));
+
+ for(j=0;j<n;j++){
+ asm volatile ("\n"
+ DO_4(0)
+ DO_4(4)
+ DO_4(8)
+ DO_4(12)
+
+ :
+ : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+ asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_16xn_u8_mmx, combine4_16xn_u8, OIL_IMPL_FLAG_MMX);
+
diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h
index 2ec8640..620237b 100644
--- a/liboil/liboilclasses.h
+++ b/liboil/liboilclasses.h
@@ -119,6 +119,9 @@ OIL_DECLARE_CLASS(clipconv_u8_u16);
OIL_DECLARE_CLASS(clipconv_u8_u32);
OIL_DECLARE_CLASS(colorspace_argb);
OIL_DECLARE_CLASS(colsad8x8_u8);
+OIL_DECLARE_CLASS(combine4_12xn_u8);
+OIL_DECLARE_CLASS(combine4_16xn_u8);
+OIL_DECLARE_CLASS(combine4_8xn_u8);
OIL_DECLARE_CLASS(compare_u8);
OIL_DECLARE_CLASS(composite_add_argb);
OIL_DECLARE_CLASS(composite_add_argb_const_src);
@@ -279,6 +282,7 @@ OIL_DECLARE_CLASS(mas2_add_s16);
OIL_DECLARE_CLASS(mas4_across_add_s16);
OIL_DECLARE_CLASS(mas4_add_s16);
OIL_DECLARE_CLASS(mas8_across_add_s16);
+OIL_DECLARE_CLASS(mas8_across_u8);
OIL_DECLARE_CLASS(mas8_add_s16);
OIL_DECLARE_CLASS(mas8_addc_rshift_decim2_u8);
OIL_DECLARE_CLASS(mas8_u8);
diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h
index e8512e8..d831edd 100644
--- a/liboil/liboilfuncs-04.h
+++ b/liboil/liboilfuncs-04.h
@@ -119,6 +119,9 @@ void oil_clipconv_u8_u16 (uint8_t * dest, int dstr, const uint16_t * src, int ss
void oil_clipconv_u8_u32 (uint8_t * dest, int dstr, const uint32_t * src, int sstr, int n);
void oil_colorspace_argb (uint32_t * d, const uint32_t * s, const int16_t * s2_24, int n);
void oil_colsad8x8_u8 (uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2);
+void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
void oil_composite_add_argb_const_src (uint32_t * i_n, const uint32_t * s1_1, int n);
@@ -279,6 +282,7 @@ void oil_mas2_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np1,
void oil_mas4_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx4, int sstr2, const int16_t * s3_4, const int16_t * s4_2, int n);
void oil_mas4_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np3, const int16_t * s3_4, const int16_t * s4_2, int n);
void oil_mas8_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n);
+void oil_mas8_across_u8 (uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n);
void oil_mas8_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n);
void oil_mas8_addc_rshift_decim2_u8 (uint8_t * d, const uint8_t * s1_2xnp9, const int16_t * s2_8, const int16_t * s3_2, int n);
void oil_mas8_u8 (uint8_t * d, const uint8_t * s1_np7, const int16_t * s2_8, const int16_t * s3_2, int n);
diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h
index 7e25d90..7b21a1d 100644
--- a/liboil/liboilfuncs-doc.h
+++ b/liboil/liboilfuncs-doc.h
@@ -83,6 +83,9 @@ void oil_clipconv_u8_u16 (uint8_t * dest, int dstr, const uint16_t * src, int ss
void oil_clipconv_u8_u32 (uint8_t * dest, int dstr, const uint32_t * src, int sstr, int n);
void oil_colorspace_argb (uint32_t * d, const uint32_t * s, const int16_t * s2_24, int n);
void oil_colsad8x8_u8 (uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2);
+void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
void oil_composite_add_argb_const_src (uint32_t * i_n, const uint32_t * s1_1, int n);
@@ -243,6 +246,7 @@ void oil_mas2_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np1,
void oil_mas4_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx4, int sstr2, const int16_t * s3_4, const int16_t * s4_2, int n);
void oil_mas4_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np3, const int16_t * s3_4, const int16_t * s4_2, int n);
void oil_mas8_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n);
+void oil_mas8_across_u8 (uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n);
void oil_mas8_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n);
void oil_mas8_addc_rshift_decim2_u8 (uint8_t * d, const uint8_t * s1_2xnp9, const int16_t * s2_8, const int16_t * s3_2, int n);
void oil_mas8_u8 (uint8_t * d, const uint8_t * s1_np7, const int16_t * s2_8, const int16_t * s3_2, int n);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index d841485..3ddc8b6 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -285,6 +285,15 @@ typedef void (*_oil_type_colorspace_argb)(uint32_t * d, const uint32_t * s, cons
extern OilFunctionClass *oil_function_class_ptr_colsad8x8_u8;
typedef void (*_oil_type_colsad8x8_u8)(uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2);
#define oil_colsad8x8_u8 ((_oil_type_colsad8x8_u8)(*(void **)oil_function_class_ptr_colsad8x8_u8))
+extern OilFunctionClass *oil_function_class_ptr_combine4_12xn_u8;
+typedef void (*_oil_type_combine4_12xn_u8)(uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_12xn_u8 ((_oil_type_combine4_12xn_u8)(*(void **)oil_function_class_ptr_combine4_12xn_u8))
+extern OilFunctionClass *oil_function_class_ptr_combine4_16xn_u8;
+typedef void (*_oil_type_combine4_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_16xn_u8 ((_oil_type_combine4_16xn_u8)(*(void **)oil_function_class_ptr_combine4_16xn_u8))
+extern OilFunctionClass *oil_function_class_ptr_combine4_8xn_u8;
+typedef void (*_oil_type_combine4_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_8xn_u8 ((_oil_type_combine4_8xn_u8)(*(void **)oil_function_class_ptr_combine4_8xn_u8))
extern OilFunctionClass *oil_function_class_ptr_compare_u8;
typedef void (*_oil_type_compare_u8)(uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
#define oil_compare_u8 ((_oil_type_compare_u8)(*(void **)oil_function_class_ptr_compare_u8))
@@ -765,6 +774,9 @@ typedef void (*_oil_type_mas4_add_s16)(int16_t * d, const int16_t * s1, const in
extern OilFunctionClass *oil_function_class_ptr_mas8_across_add_s16;
typedef void (*_oil_type_mas8_across_add_s16)(int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n);
#define oil_mas8_across_add_s16 ((_oil_type_mas8_across_add_s16)(*(void **)oil_function_class_ptr_mas8_across_add_s16))
+extern OilFunctionClass *oil_function_class_ptr_mas8_across_u8;
+typedef void (*_oil_type_mas8_across_u8)(uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n);
+#define oil_mas8_across_u8 ((_oil_type_mas8_across_u8)(*(void **)oil_function_class_ptr_mas8_across_u8))
extern OilFunctionClass *oil_function_class_ptr_mas8_add_s16;
typedef void (*_oil_type_mas8_add_s16)(int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n);
#define oil_mas8_add_s16 ((_oil_type_mas8_add_s16)(*(void **)oil_function_class_ptr_mas8_add_s16))
diff --git a/liboil/liboilmarshal.c b/liboil/liboilmarshal.c
index 0adc49c..4ccf671 100644
--- a/liboil/liboilmarshal.c
+++ b/liboil/liboilmarshal.c
@@ -94,6 +94,12 @@ _oil_test_marshal_function (void *func, unsigned long *args, int n_args,
((void *)args[0],(void *)args[1],(int)args[2],(void *)args[3],(int)args[4]);
oil_profile_stop (prof);
break;
+ case 0x1aaa:
+ oil_profile_start (prof);
+ ((void (*)(void *,int,void *,int,void *,int,void *,int,void *,int,void *,int))func)
+ ((void *)args[0],(int)args[1],(void *)args[2],(int)args[3],(void *)args[4],(int)args[5],(void *)args[6],(int)args[7],(void *)args[8],(int)args[9],(void *)args[10],(int)args[11]);
+ oil_profile_stop (prof);
+ break;
case 0x006a:
oil_profile_start (prof);
((void (*)(void *,int,void *,int,void *,int))func)
diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c
index 3d63847..b4c6e5d 100644
--- a/liboil/liboiltest.c
+++ b/liboil/liboiltest.c
@@ -39,6 +39,8 @@
#include <stdio.h>
#include <math.h>
+#define MAX_PARAMS 20
+
/**
* SECTION:liboiltest
* @title:OilTest
@@ -222,7 +224,7 @@ oil_test_check_function (void * priv)
OilTest *test = priv;
int i;
int j;
- unsigned long args[10];
+ unsigned long args[MAX_PARAMS];
unsigned int pointer_mask;
oil_test_init (test);
@@ -287,7 +289,7 @@ oil_test_check_ref (OilTest *test)
{
int i;
- if (test->proto->n_params > 10) {
+ if (test->proto->n_params > MAX_PARAMS) {
OIL_ERROR ("function class %s has too many parameters",
test->klass->name);
return;
@@ -344,7 +346,7 @@ oil_test_check_impl (OilTest *test, OilFunctionImpl *impl)
int fail = 0;
int ret;
- if (test->proto->n_params > 10) {
+ if (test->proto->n_params > MAX_PARAMS) {
OIL_ERROR ("function has too many parameters");
return 0;
}
diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c
index ccc7e4e..710460a 100644
--- a/liboil/liboiltrampolines.c
+++ b/liboil/liboiltrampolines.c
@@ -861,6 +861,36 @@ oil_colsad8x8_u8 (uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t
((void (*)(uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2))(_oil_function_class_colsad8x8_u8.func))(d_1, s1_8x8, ss1, s2_8x8, ss2);
}
+#undef oil_combine4_12xn_u8
+void
+oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n)
+{
+ if (_oil_function_class_combine4_12xn_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_combine4_12xn_u8);
+ }
+ ((void (*)(uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_12xn_u8.func))(d_12xn, ds1, s1_12xn, ss1, s2_12xn, ss2, s3_12xn, ss3, s4_12xn, ss4, s5_6, n);
+}
+
+#undef oil_combine4_16xn_u8
+void
+oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n)
+{
+ if (_oil_function_class_combine4_16xn_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_combine4_16xn_u8);
+ }
+ ((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, s3_16xn, ss3, s4_16xn, ss4, s5_6, n);
+}
+
+#undef oil_combine4_8xn_u8
+void
+oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n)
+{
+ if (_oil_function_class_combine4_8xn_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_combine4_8xn_u8);
+ }
+ ((void (*)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_8xn_u8.func))(d_8xn, ds1, s1_8xn, ss1, s2_8xn, ss2, s3_8xn, ss3, s4_8xn, ss4, s5_6, n);
+}
+
#undef oil_compare_u8
void
oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n)
@@ -2461,6 +2491,16 @@ oil_mas8_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx8
((void (*)(int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n))(_oil_function_class_mas8_across_add_s16.func))(d, s1, s2_nx8, sstr2, s3_8, s4_2, n);
}
+#undef oil_mas8_across_u8
+void
+oil_mas8_across_u8 (uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n)
+{
+ if (_oil_function_class_mas8_across_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_mas8_across_u8);
+ }
+ ((void (*)(uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n))(_oil_function_class_mas8_across_u8.func))(d, s1_nx8, sstr1, s2_8, s3_2, n);
+}
+
#undef oil_mas8_add_s16
void
oil_mas8_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n)
diff --git a/liboil/ref/mas.c b/liboil/ref/mas.c
index 1ec6234..e098bc2 100644
--- a/liboil/ref/mas.c
+++ b/liboil/ref/mas.c
@@ -250,6 +250,8 @@ OIL_DEFINE_CLASS_FULL (mas12across_addc_rshift_u8,
OIL_DEFINE_CLASS_FULL (mas8_addc_rshift_decim2_u8,
"uint8_t *d, uint8_t *s1_2xnp9, int16_t *s2_8, "
"int16_t *s3_2, int n", mas8_test);
+OIL_DEFINE_CLASS_FULL (mas8_across_u8, "uint8_t *d, uint8_t *s1_nx8, int sstr1, "
+ "int16_t *s2_8, int16_t *s3_2, int n", mas8_u8_test);
void
mas2_add_s16_ref(int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
@@ -421,6 +423,24 @@ mas8_u8_sym_l15_ref (uint8_t *d, const uint8_t *s1_np7,
}
OIL_DEFINE_IMPL_REF (mas8_u8_sym_l15_ref, mas8_u8_sym_l15);
+void
+mas8_across_u8_ref (uint8_t *d, uint8_t *s1_nx8, int sstr1,
+ int16_t *s2_8, int16_t *s3_2, int n)
+{
+ int i;
+ int j;
+ int x;
+ for(i=0;i<n;i++){
+ x = s3_2[0];
+ for(j=0;j<8;j++){
+ x += OIL_GET(s1_nx8, i*sizeof(uint8_t) + j*sstr1, uint8_t)*s2_8[j];
+ }
+ x >>= s3_2[1];
+ d[i] = CLAMP(x,0,255);
+ }
+}
+OIL_DEFINE_IMPL_REF (mas8_across_u8_ref, mas8_across_u8);
+
static void
mas12_addc_rshift_decim2_u8_ref (uint8_t *dest, const uint8_t *src,
const int16_t *taps, const int16_t *offsetshift, int n)
diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c
index b7e8175..bb49eca 100644
--- a/liboil/ref/wavelet.c
+++ b/liboil/ref/wavelet.c
@@ -39,6 +39,20 @@ lshift_test (OilTest *test)
data = (int16_t *)oil_test_get_source_data (test, OIL_ARG_SRC2);
data[0] = 12;
}
+
+static void
+combine4_test (OilTest *test)
+{
+ int16_t *data;
+
+ data = (int16_t *)oil_test_get_source_data (test, OIL_ARG_SRC5);
+ data[0] = 4;
+ data[1] = 4;
+ data[2] = 4;
+ data[3] = 4;
+ data[4] = 8;
+ data[5] = 4;
+}
OIL_DEFINE_CLASS_FULL (deinterleave,
"int16_t *d_2xn, int16_t *s_2xn, int n", wavelet_test);
@@ -82,6 +96,15 @@ OIL_DEFINE_CLASS (multiply_and_acc_16xn_s16_u8, "int16_t *i1_16xn, int is1, "
"int16_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n");
OIL_DEFINE_CLASS (multiply_and_acc_24xn_s16_u8, "int16_t *i1_24xn, int is1, "
"int16_t *s1_24xn, int ss1, uint8_t *s2_24xn, int ss2, int n");
+OIL_DEFINE_CLASS_FULL (combine4_8xn_u8, "uint8_t *d_8xn, int ds1, "
+ "uint8_t *s1_8xn, int ss1, uint8_t *s2_8xn, int ss2, uint8_t *s3_8xn, "
+ "int ss3, uint8_t *s4_8xn, int ss4, int16_t *s5_6, int n", combine4_test);
+OIL_DEFINE_CLASS_FULL (combine4_12xn_u8, "uint8_t *d_12xn, int ds1, "
+ "uint8_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, uint8_t *s3_12xn, "
+ "int ss3, uint8_t *s4_12xn, int ss4, int16_t *s5_6, int n", combine4_test);
+OIL_DEFINE_CLASS_FULL (combine4_16xn_u8, "uint8_t *d_16xn, int ds1, "
+ "uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, uint8_t *s3_16xn, "
+ "int ss3, uint8_t *s4_16xn, int ss4, int16_t *s5_6, int n", combine4_test);
void
deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n)
@@ -629,3 +652,87 @@ multiply_and_acc_24xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
OIL_DEFINE_IMPL_REF (multiply_and_acc_24xn_s16_u8_ref,
multiply_and_acc_24xn_s16_u8);
+void
+combine4_8xn_u8_ref (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int i;
+ int j;
+ for(j=0;j<n;j++){
+ for(i=0;i<8;i++){
+ int x = 0;
+ x += s5_6[0] * s1[i];
+ x += s5_6[1] * s2[i];
+ x += s5_6[2] * s3[i];
+ x += s5_6[3] * s4[i];
+ d[i] = (x + s5_6[4]) >> s5_6[5];
+ }
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+}
+OIL_DEFINE_IMPL_REF (combine4_8xn_u8_ref, combine4_8xn_u8);
+
+void
+combine4_12xn_u8_ref (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int i;
+ int j;
+ for(j=0;j<n;j++){
+ for(i=0;i<12;i++){
+ int x = 0;
+ x += s5_6[0] * s1[i];
+ x += s5_6[1] * s2[i];
+ x += s5_6[2] * s3[i];
+ x += s5_6[3] * s4[i];
+ d[i] = (x + s5_6[4]) >> s5_6[5];
+ }
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+}
+OIL_DEFINE_IMPL_REF (combine4_12xn_u8_ref, combine4_12xn_u8);
+
+void
+combine4_16xn_u8_ref (uint8_t *d, int ds1,
+ uint8_t *s1, int ss1,
+ uint8_t *s2, int ss2,
+ uint8_t *s3, int ss3,
+ uint8_t *s4, int ss4,
+ int16_t *s5_6, int n)
+{
+ int i;
+ int j;
+ for(j=0;j<n;j++){
+ for(i=0;i<16;i++){
+ int x = 0;
+ x += s5_6[0] * s1[i];
+ x += s5_6[1] * s2[i];
+ x += s5_6[2] * s3[i];
+ x += s5_6[3] * s4[i];
+ d[i] = (x + s5_6[4]) >> s5_6[5];
+ }
+ s1 += ss1;
+ s2 += ss2;
+ s3 += ss3;
+ s4 += ss4;
+ d += ds1;
+ }
+}
+OIL_DEFINE_IMPL_REF (combine4_16xn_u8_ref, combine4_16xn_u8);
+
More information about the Liboil-commit
mailing list