[Liboil-commit] liboil/i386 liboil/i386_amd64 liboil/liboilclasses.h liboil/liboilfuncs-04.h liboil/liboilfuncs-doc.h liboil/liboilfuncs.h liboil/liboilmarshal.c liboil/liboiltest.c liboil/liboiltrampolines.c liboil/ref

David Schleef ds at kemper.freedesktop.org
Wed Feb 13 02:25:45 PST 2008


 liboil/i386/mas.c          |  113 ++++++++++++++++++++++++++++++++++++++
 liboil/i386_amd64/sad8x8.c |  133 +++++++++++++++++++++++++++++++++++++++++++++
 liboil/liboilclasses.h     |    4 +
 liboil/liboilfuncs-04.h    |    4 +
 liboil/liboilfuncs-doc.h   |    4 +
 liboil/liboilfuncs.h       |   12 ++++
 liboil/liboilmarshal.c     |    6 ++
 liboil/liboiltest.c        |    8 +-
 liboil/liboiltrampolines.c |   40 +++++++++++++
 liboil/ref/mas.c           |   20 ++++++
 liboil/ref/wavelet.c       |  107 ++++++++++++++++++++++++++++++++++++
 11 files changed, 448 insertions(+), 3 deletions(-)

New commits:
commit 90c27e2a2ef4c51fa4d18c8d2cb0c8b0fd92454f
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Wed Feb 13 02:27:30 2008 -0800

    Copy more functions from schro.

diff --git a/liboil/i386/mas.c b/liboil/i386/mas.c
index ebce3f6..5153ec2 100644
--- a/liboil/i386/mas.c
+++ b/liboil/i386/mas.c
@@ -860,3 +860,116 @@ OIL_DEFINE_IMPL_FULL (mas8_addc_rshift_decim2_u8_mmx_4,
 
 #endif
 
+void
+mas8_across_u8_mmx_3 (uint8_t *d, const uint8_t *s1_nx8, int ss1,
+    const int16_t *s2_8, const int16_t *s3_2, int n)
+{
+  int i;
+  int x;
+
+  while(n&3) {
+    x = 0;
+    for(i=0;i<8;i++){
+      x += OIL_GET(s1_nx8, i*ss1, uint8_t)*s2_8[i];
+    }
+    *d = CLAMP((x + s3_2[0])>>s3_2[1],0,255);
+    d++;
+    s1_nx8++;
+    n--;
+  }
+
+  if (n == 0) return;
+  n>>=2;
+  __asm__ __volatile__("\n"
+      "  pxor %%mm7, %%mm7\n"
+
+      "  movd (%[s3_2]), %%mm6\n"
+
+      "  movzwl 2(%[s3_2]), %%ecx\n"
+      "  movd %%ecx, %%mm5\n"
+
+      "  movq 0(%[s2_8]), %%mm3\n"
+      "  movq 8(%[s2_8]), %%mm4\n"
+      :
+      : [s2_8] "r" (s2_8),
+        [s3_2] "r" (s3_2)
+      : "ecx");
+
+  while (n > 0) {
+    const uint8_t *p = s1_nx8;
+  __asm__ __volatile__("\n"
+      "1:\n"
+      /* load 128 */
+      "  pshufw $0x00, %%mm6, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x00, %%mm3, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x55*1, %%mm3, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x55*2, %%mm3, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x55*3, %%mm3, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x00, %%mm4, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x55*1, %%mm4, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x55*2, %%mm4, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  movd 0(%[p]), %%mm0\n"
+      "  addl %[ss1], %[p]\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pshufw $0x55*3, %%mm4, %%mm1\n"
+      "  pmullw %%mm1, %%mm0\n"
+      "  paddw %%mm0, %%mm2\n"
+
+      "  psraw %%mm5, %%mm2\n"
+      "  pmaxsw %%mm7, %%mm2\n"
+      "  packuswb %%mm2, %%mm2\n"
+      "  movd %%mm2, 0(%[d])\n"
+      : [p] "+r" (p)
+      : [d] "r" (d), [ss1] "r" (ss1));
+    d+=4;
+    s1_nx8+=4;
+    n--;
+  }
+
+  asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (mas8_across_u8_mmx_3, mas8_across_u8, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
+
diff --git a/liboil/i386_amd64/sad8x8.c b/liboil/i386_amd64/sad8x8.c
index 34cf96d..07cf9f3 100644
--- a/liboil/i386_amd64/sad8x8.c
+++ b/liboil/i386_amd64/sad8x8.c
@@ -488,3 +488,136 @@ sad16x16_u8_mmxext (uint32_t * dest, uint8_t * src1, int sstr1, uint8_t * src2,
 }
 OIL_DEFINE_IMPL_FULL (sad16x16_u8_mmxext, sad16x16_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
 
+void
+combine4_12xn_u8_mmx (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int j;
+
+  asm volatile ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "  movq 0(%0), %%mm6\n"
+      "  movd 8(%0), %%mm4\n"
+      "  pshufw $0x00, %%mm4, %%mm4\n"
+      ::"r" (s5_6));
+
+  for(j=0;j<n;j++){
+    asm volatile ("\n"
+#define DO_4(offset) \
+        "  movd " #offset "(%1), %%mm0\n" \
+        "  punpcklbw %%mm7, %%mm0\n" \
+        "  pshufw $0x00, %%mm6, %%mm5\n" \
+        "  pmullw %%mm5, %%mm0\n" \
+        "  movd " #offset "(%2), %%mm1\n" \
+        "  punpcklbw %%mm7, %%mm1\n" \
+        "  pshufw $0x55, %%mm6, %%mm5\n" \
+        "  pmullw %%mm5, %%mm1\n" \
+        "  movd " #offset "(%3), %%mm2\n" \
+        "  punpcklbw %%mm7, %%mm2\n" \
+        "  pshufw $0xaa, %%mm6, %%mm5\n" \
+        "  pmullw %%mm5, %%mm2\n" \
+        "  movd " #offset "(%4), %%mm3\n" \
+        "  punpcklbw %%mm7, %%mm3\n" \
+        "  pshufw $0xff, %%mm6, %%mm5\n" \
+        "  pmullw %%mm5, %%mm3\n" \
+        "  paddw %%mm1, %%mm0\n" \
+        "  paddw %%mm2, %%mm0\n" \
+        "  paddw %%mm3, %%mm0\n" \
+        "  paddw %%mm4, %%mm0\n" \
+        "  psrlw $4, %%mm0\n" \
+        "  packuswb %%mm0, %%mm0\n" \
+        "  movd %%mm0, " #offset "(%0)\n"
+
+        DO_4(0)
+        DO_4(4)
+        DO_4(8)
+
+        :
+        : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+  asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_12xn_u8_mmx, combine4_12xn_u8, OIL_IMPL_FLAG_MMX);
+
+void
+combine4_8xn_u8_mmx (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int j;
+
+  asm volatile ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "  movq 0(%0), %%mm6\n"
+      "  movd 8(%0), %%mm4\n"
+      "  pshufw $0x00, %%mm4, %%mm4\n"
+      ::"r" (s5_6));
+
+  for(j=0;j<n;j++){
+    asm volatile ("\n"
+        DO_4(0)
+        DO_4(4)
+
+        :
+        : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+  asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_8xn_u8_mmx, combine4_8xn_u8, OIL_IMPL_FLAG_MMX);
+
+void
+combine4_16xn_u8_mmx (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int j;
+
+  asm volatile ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "  movq 0(%0), %%mm6\n"
+      "  movd 8(%0), %%mm4\n"
+      "  pshufw $0x00, %%mm4, %%mm4\n"
+      ::"r" (s5_6));
+
+  for(j=0;j<n;j++){
+    asm volatile ("\n"
+        DO_4(0)
+        DO_4(4)
+        DO_4(8)
+        DO_4(12)
+
+        :
+        : "r" (d), "r" (s1), "r" (s2), "r" (s3), "r" (s4));
+
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+  asm volatile ("emms");
+}
+OIL_DEFINE_IMPL_FULL (combine4_16xn_u8_mmx, combine4_16xn_u8, OIL_IMPL_FLAG_MMX);
+
diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h
index 2ec8640..620237b 100644
--- a/liboil/liboilclasses.h
+++ b/liboil/liboilclasses.h
@@ -119,6 +119,9 @@ OIL_DECLARE_CLASS(clipconv_u8_u16);
 OIL_DECLARE_CLASS(clipconv_u8_u32);
 OIL_DECLARE_CLASS(colorspace_argb);
 OIL_DECLARE_CLASS(colsad8x8_u8);
+OIL_DECLARE_CLASS(combine4_12xn_u8);
+OIL_DECLARE_CLASS(combine4_16xn_u8);
+OIL_DECLARE_CLASS(combine4_8xn_u8);
 OIL_DECLARE_CLASS(compare_u8);
 OIL_DECLARE_CLASS(composite_add_argb);
 OIL_DECLARE_CLASS(composite_add_argb_const_src);
@@ -279,6 +282,7 @@ OIL_DECLARE_CLASS(mas2_add_s16);
 OIL_DECLARE_CLASS(mas4_across_add_s16);
 OIL_DECLARE_CLASS(mas4_add_s16);
 OIL_DECLARE_CLASS(mas8_across_add_s16);
+OIL_DECLARE_CLASS(mas8_across_u8);
 OIL_DECLARE_CLASS(mas8_add_s16);
 OIL_DECLARE_CLASS(mas8_addc_rshift_decim2_u8);
 OIL_DECLARE_CLASS(mas8_u8);
diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h
index e8512e8..d831edd 100644
--- a/liboil/liboilfuncs-04.h
+++ b/liboil/liboilfuncs-04.h
@@ -119,6 +119,9 @@ void oil_clipconv_u8_u16 (uint8_t * dest, int dstr, const uint16_t * src, int ss
 void oil_clipconv_u8_u32 (uint8_t * dest, int dstr, const uint32_t * src, int sstr, int n);
 void oil_colorspace_argb (uint32_t * d, const uint32_t * s, const int16_t * s2_24, int n);
 void oil_colsad8x8_u8 (uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2);
+void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
 void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
 void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
 void oil_composite_add_argb_const_src (uint32_t * i_n, const uint32_t * s1_1, int n);
@@ -279,6 +282,7 @@ void oil_mas2_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np1,
 void oil_mas4_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx4, int sstr2, const int16_t * s3_4, const int16_t * s4_2, int n);
 void oil_mas4_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np3, const int16_t * s3_4, const int16_t * s4_2, int n);
 void oil_mas8_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n);
+void oil_mas8_across_u8 (uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n);
 void oil_mas8_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n);
 void oil_mas8_addc_rshift_decim2_u8 (uint8_t * d, const uint8_t * s1_2xnp9, const int16_t * s2_8, const int16_t * s3_2, int n);
 void oil_mas8_u8 (uint8_t * d, const uint8_t * s1_np7, const int16_t * s2_8, const int16_t * s3_2, int n);
diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h
index 7e25d90..7b21a1d 100644
--- a/liboil/liboilfuncs-doc.h
+++ b/liboil/liboilfuncs-doc.h
@@ -83,6 +83,9 @@ void oil_clipconv_u8_u16 (uint8_t * dest, int dstr, const uint16_t * src, int ss
 void oil_clipconv_u8_u32 (uint8_t * dest, int dstr, const uint32_t * src, int sstr, int n);
 void oil_colorspace_argb (uint32_t * d, const uint32_t * s, const int16_t * s2_24, int n);
 void oil_colsad8x8_u8 (uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2);
+void oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+void oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
 void oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
 void oil_composite_add_argb (uint32_t * i_n, const uint32_t * s1_n, int n);
 void oil_composite_add_argb_const_src (uint32_t * i_n, const uint32_t * s1_1, int n);
@@ -243,6 +246,7 @@ void oil_mas2_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np1,
 void oil_mas4_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx4, int sstr2, const int16_t * s3_4, const int16_t * s4_2, int n);
 void oil_mas4_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np3, const int16_t * s3_4, const int16_t * s4_2, int n);
 void oil_mas8_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n);
+void oil_mas8_across_u8 (uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n);
 void oil_mas8_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n);
 void oil_mas8_addc_rshift_decim2_u8 (uint8_t * d, const uint8_t * s1_2xnp9, const int16_t * s2_8, const int16_t * s3_2, int n);
 void oil_mas8_u8 (uint8_t * d, const uint8_t * s1_np7, const int16_t * s2_8, const int16_t * s3_2, int n);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index d841485..3ddc8b6 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -285,6 +285,15 @@ typedef void (*_oil_type_colorspace_argb)(uint32_t * d, const uint32_t * s, cons
 extern OilFunctionClass *oil_function_class_ptr_colsad8x8_u8;
 typedef void (*_oil_type_colsad8x8_u8)(uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2);
 #define oil_colsad8x8_u8 ((_oil_type_colsad8x8_u8)(*(void **)oil_function_class_ptr_colsad8x8_u8))
+extern OilFunctionClass *oil_function_class_ptr_combine4_12xn_u8;
+typedef void (*_oil_type_combine4_12xn_u8)(uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_12xn_u8 ((_oil_type_combine4_12xn_u8)(*(void **)oil_function_class_ptr_combine4_12xn_u8))
+extern OilFunctionClass *oil_function_class_ptr_combine4_16xn_u8;
+typedef void (*_oil_type_combine4_16xn_u8)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_16xn_u8 ((_oil_type_combine4_16xn_u8)(*(void **)oil_function_class_ptr_combine4_16xn_u8))
+extern OilFunctionClass *oil_function_class_ptr_combine4_8xn_u8;
+typedef void (*_oil_type_combine4_8xn_u8)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n);
+#define oil_combine4_8xn_u8 ((_oil_type_combine4_8xn_u8)(*(void **)oil_function_class_ptr_combine4_8xn_u8))
 extern OilFunctionClass *oil_function_class_ptr_compare_u8;
 typedef void (*_oil_type_compare_u8)(uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n);
 #define oil_compare_u8 ((_oil_type_compare_u8)(*(void **)oil_function_class_ptr_compare_u8))
@@ -765,6 +774,9 @@ typedef void (*_oil_type_mas4_add_s16)(int16_t * d, const int16_t * s1, const in
 extern OilFunctionClass *oil_function_class_ptr_mas8_across_add_s16;
 typedef void (*_oil_type_mas8_across_add_s16)(int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n);
 #define oil_mas8_across_add_s16 ((_oil_type_mas8_across_add_s16)(*(void **)oil_function_class_ptr_mas8_across_add_s16))
+extern OilFunctionClass *oil_function_class_ptr_mas8_across_u8;
+typedef void (*_oil_type_mas8_across_u8)(uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n);
+#define oil_mas8_across_u8 ((_oil_type_mas8_across_u8)(*(void **)oil_function_class_ptr_mas8_across_u8))
 extern OilFunctionClass *oil_function_class_ptr_mas8_add_s16;
 typedef void (*_oil_type_mas8_add_s16)(int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n);
 #define oil_mas8_add_s16 ((_oil_type_mas8_add_s16)(*(void **)oil_function_class_ptr_mas8_add_s16))
diff --git a/liboil/liboilmarshal.c b/liboil/liboilmarshal.c
index 0adc49c..4ccf671 100644
--- a/liboil/liboilmarshal.c
+++ b/liboil/liboilmarshal.c
@@ -94,6 +94,12 @@ _oil_test_marshal_function (void *func, unsigned long *args, int n_args,
         ((void *)args[0],(void *)args[1],(int)args[2],(void *)args[3],(int)args[4]);
       oil_profile_stop (prof);
       break;
+    case 0x1aaa:
+      oil_profile_start (prof);
+      ((void (*)(void *,int,void *,int,void *,int,void *,int,void *,int,void *,int))func)
+        ((void *)args[0],(int)args[1],(void *)args[2],(int)args[3],(void *)args[4],(int)args[5],(void *)args[6],(int)args[7],(void *)args[8],(int)args[9],(void *)args[10],(int)args[11]);
+      oil_profile_stop (prof);
+      break;
     case 0x006a:
       oil_profile_start (prof);
       ((void (*)(void *,int,void *,int,void *,int))func)
diff --git a/liboil/liboiltest.c b/liboil/liboiltest.c
index 3d63847..b4c6e5d 100644
--- a/liboil/liboiltest.c
+++ b/liboil/liboiltest.c
@@ -39,6 +39,8 @@
 #include <stdio.h>
 #include <math.h>
 
+#define MAX_PARAMS 20
+
 /**
  * SECTION:liboiltest
  * @title:OilTest
@@ -222,7 +224,7 @@ oil_test_check_function (void * priv)
   OilTest *test = priv;
   int i;
   int j;
-  unsigned long args[10];
+  unsigned long args[MAX_PARAMS];
   unsigned int pointer_mask;
 
   oil_test_init (test);
@@ -287,7 +289,7 @@ oil_test_check_ref (OilTest *test)
 {
   int i;
 
-  if (test->proto->n_params > 10) {
+  if (test->proto->n_params > MAX_PARAMS) {
     OIL_ERROR ("function class %s has too many parameters",
         test->klass->name);
     return;
@@ -344,7 +346,7 @@ oil_test_check_impl (OilTest *test, OilFunctionImpl *impl)
   int fail = 0;
   int ret;
 
-  if (test->proto->n_params > 10) {
+  if (test->proto->n_params > MAX_PARAMS) {
     OIL_ERROR ("function has too many parameters");
     return 0;
   }
diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c
index ccc7e4e..710460a 100644
--- a/liboil/liboiltrampolines.c
+++ b/liboil/liboiltrampolines.c
@@ -861,6 +861,36 @@ oil_colsad8x8_u8 (uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t
   ((void (*)(uint32_t * d_1, const uint8_t * s1_8x8, int ss1, const uint8_t * s2_8x8, int ss2))(_oil_function_class_colsad8x8_u8.func))(d_1, s1_8x8, ss1, s2_8x8, ss2);
 }
 
+#undef oil_combine4_12xn_u8
+void
+oil_combine4_12xn_u8 (uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n)
+{
+  if (_oil_function_class_combine4_12xn_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_combine4_12xn_u8);
+  }
+  ((void (*)(uint8_t * d_12xn, int ds1, const uint8_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, const uint8_t * s3_12xn, int ss3, const uint8_t * s4_12xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_12xn_u8.func))(d_12xn, ds1, s1_12xn, ss1, s2_12xn, ss2, s3_12xn, ss3, s4_12xn, ss4, s5_6, n);
+}
+
+#undef oil_combine4_16xn_u8
+void
+oil_combine4_16xn_u8 (uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n)
+{
+  if (_oil_function_class_combine4_16xn_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_combine4_16xn_u8);
+  }
+  ((void (*)(uint8_t * d_16xn, int ds1, const uint8_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, const uint8_t * s3_16xn, int ss3, const uint8_t * s4_16xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_16xn_u8.func))(d_16xn, ds1, s1_16xn, ss1, s2_16xn, ss2, s3_16xn, ss3, s4_16xn, ss4, s5_6, n);
+}
+
+#undef oil_combine4_8xn_u8
+void
+oil_combine4_8xn_u8 (uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n)
+{
+  if (_oil_function_class_combine4_8xn_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_combine4_8xn_u8);
+  }
+  ((void (*)(uint8_t * d_8xn, int ds1, const uint8_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, const uint8_t * s3_8xn, int ss3, const uint8_t * s4_8xn, int ss4, const int16_t * s5_6, int n))(_oil_function_class_combine4_8xn_u8.func))(d_8xn, ds1, s1_8xn, ss1, s2_8xn, ss2, s3_8xn, ss3, s4_8xn, ss4, s5_6, n);
+}
+
 #undef oil_compare_u8
 void
 oil_compare_u8 (uint32_t * d_1, const uint8_t * s1, const uint8_t * s2, int n)
@@ -2461,6 +2491,16 @@ oil_mas8_across_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_nx8
   ((void (*)(int16_t * d, const int16_t * s1, const int16_t * s2_nx8, int sstr2, const int16_t * s3_8, const int16_t * s4_2, int n))(_oil_function_class_mas8_across_add_s16.func))(d, s1, s2_nx8, sstr2, s3_8, s4_2, n);
 }
 
+#undef oil_mas8_across_u8
+void
+oil_mas8_across_u8 (uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n)
+{
+  if (_oil_function_class_mas8_across_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_mas8_across_u8);
+  }
+  ((void (*)(uint8_t * d, const uint8_t * s1_nx8, int sstr1, const int16_t * s2_8, const int16_t * s3_2, int n))(_oil_function_class_mas8_across_u8.func))(d, s1_nx8, sstr1, s2_8, s3_2, n);
+}
+
 #undef oil_mas8_add_s16
 void
 oil_mas8_add_s16 (int16_t * d, const int16_t * s1, const int16_t * s2_np7, const int16_t * s3_8, const int16_t * s4_2, int n)
diff --git a/liboil/ref/mas.c b/liboil/ref/mas.c
index 1ec6234..e098bc2 100644
--- a/liboil/ref/mas.c
+++ b/liboil/ref/mas.c
@@ -250,6 +250,8 @@ OIL_DEFINE_CLASS_FULL (mas12across_addc_rshift_u8,
 OIL_DEFINE_CLASS_FULL (mas8_addc_rshift_decim2_u8,
     "uint8_t *d, uint8_t *s1_2xnp9, int16_t *s2_8, "
     "int16_t *s3_2, int n", mas8_test);
+OIL_DEFINE_CLASS_FULL (mas8_across_u8, "uint8_t *d, uint8_t *s1_nx8, int sstr1, "
+    "int16_t *s2_8, int16_t *s3_2, int n", mas8_u8_test);
 
 void
 mas2_add_s16_ref(int16_t *d1, int16_t *s1, int16_t *s2, int16_t *s3_2,
@@ -421,6 +423,24 @@ mas8_u8_sym_l15_ref (uint8_t *d, const uint8_t *s1_np7,
 }
 OIL_DEFINE_IMPL_REF (mas8_u8_sym_l15_ref, mas8_u8_sym_l15);
 
+void
+mas8_across_u8_ref (uint8_t *d, uint8_t *s1_nx8, int sstr1,
+    int16_t *s2_8, int16_t *s3_2, int n)
+{
+  int i;
+  int j;
+  int x;
+  for(i=0;i<n;i++){
+    x = s3_2[0];
+    for(j=0;j<8;j++){
+      x += OIL_GET(s1_nx8, i*sizeof(uint8_t) + j*sstr1, uint8_t)*s2_8[j];
+    }
+    x >>= s3_2[1];
+    d[i] = CLAMP(x,0,255);
+  }
+}
+OIL_DEFINE_IMPL_REF (mas8_across_u8_ref, mas8_across_u8);
+
 static void
 mas12_addc_rshift_decim2_u8_ref (uint8_t *dest, const uint8_t *src,
     const int16_t *taps, const int16_t *offsetshift, int n)
diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c
index b7e8175..bb49eca 100644
--- a/liboil/ref/wavelet.c
+++ b/liboil/ref/wavelet.c
@@ -39,6 +39,20 @@ lshift_test (OilTest *test)
   data = (int16_t *)oil_test_get_source_data (test, OIL_ARG_SRC2);
   data[0] = 12;
 }
+ 
+static void
+combine4_test (OilTest *test)
+{
+  int16_t *data;
+
+  data = (int16_t *)oil_test_get_source_data (test, OIL_ARG_SRC5);
+  data[0] = 4;
+  data[1] = 4;
+  data[2] = 4;
+  data[3] = 4;
+  data[4] = 8;
+  data[5] = 4;
+}
 
 OIL_DEFINE_CLASS_FULL (deinterleave,
     "int16_t *d_2xn, int16_t *s_2xn, int n", wavelet_test);
@@ -82,6 +96,15 @@ OIL_DEFINE_CLASS (multiply_and_acc_16xn_s16_u8, "int16_t *i1_16xn, int is1, "
     "int16_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n");
 OIL_DEFINE_CLASS (multiply_and_acc_24xn_s16_u8, "int16_t *i1_24xn, int is1, "
     "int16_t *s1_24xn, int ss1, uint8_t *s2_24xn, int ss2, int n");
+OIL_DEFINE_CLASS_FULL (combine4_8xn_u8, "uint8_t *d_8xn, int ds1, "
+    "uint8_t *s1_8xn, int ss1, uint8_t *s2_8xn, int ss2, uint8_t *s3_8xn, "
+    "int ss3, uint8_t *s4_8xn, int ss4, int16_t *s5_6, int n", combine4_test);
+OIL_DEFINE_CLASS_FULL (combine4_12xn_u8, "uint8_t *d_12xn, int ds1, "
+    "uint8_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, uint8_t *s3_12xn, "
+    "int ss3, uint8_t *s4_12xn, int ss4, int16_t *s5_6, int n", combine4_test);
+OIL_DEFINE_CLASS_FULL (combine4_16xn_u8, "uint8_t *d_16xn, int ds1, "
+    "uint8_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, uint8_t *s3_16xn, "
+    "int ss3, uint8_t *s4_16xn, int ss4, int16_t *s5_6, int n", combine4_test);
 
 void
 deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n)
@@ -629,3 +652,87 @@ multiply_and_acc_24xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
 OIL_DEFINE_IMPL_REF (multiply_and_acc_24xn_s16_u8_ref,
     multiply_and_acc_24xn_s16_u8);
 
+void
+combine4_8xn_u8_ref (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int i;
+  int j;
+  for(j=0;j<n;j++){
+    for(i=0;i<8;i++){
+      int x = 0;
+      x += s5_6[0] * s1[i];
+      x += s5_6[1] * s2[i];
+      x += s5_6[2] * s3[i];
+      x += s5_6[3] * s4[i];
+      d[i] = (x + s5_6[4]) >> s5_6[5];
+    }
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+}
+OIL_DEFINE_IMPL_REF (combine4_8xn_u8_ref, combine4_8xn_u8);
+
+void
+combine4_12xn_u8_ref (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int i;
+  int j;
+  for(j=0;j<n;j++){
+    for(i=0;i<12;i++){
+      int x = 0;
+      x += s5_6[0] * s1[i];
+      x += s5_6[1] * s2[i];
+      x += s5_6[2] * s3[i];
+      x += s5_6[3] * s4[i];
+      d[i] = (x + s5_6[4]) >> s5_6[5];
+    }
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+}
+OIL_DEFINE_IMPL_REF (combine4_12xn_u8_ref, combine4_12xn_u8);
+
+void
+combine4_16xn_u8_ref (uint8_t *d, int ds1,
+    uint8_t *s1, int ss1,
+    uint8_t *s2, int ss2,
+    uint8_t *s3, int ss3,
+    uint8_t *s4, int ss4,
+    int16_t *s5_6, int n)
+{
+  int i;
+  int j;
+  for(j=0;j<n;j++){
+    for(i=0;i<16;i++){
+      int x = 0;
+      x += s5_6[0] * s1[i];
+      x += s5_6[1] * s2[i];
+      x += s5_6[2] * s3[i];
+      x += s5_6[3] * s4[i];
+      d[i] = (x + s5_6[4]) >> s5_6[5];
+    }
+    s1 += ss1;
+    s2 += ss2;
+    s3 += ss3;
+    s4 += ss4;
+    d += ds1;
+  }
+}
+OIL_DEFINE_IMPL_REF (combine4_16xn_u8_ref, combine4_16xn_u8);
+


More information about the Liboil-commit mailing list