[Liboil-commit] 3 commits - liboil/c liboil/i386 liboil/liboilarray.c liboil/liboilclasses.h liboil/liboilcpu-arm.c liboil/liboilfuncs-04.h liboil/liboilfuncs-doc.h liboil/liboilfuncs.h liboil/liboiltrampolines.c liboil/ref

David Schleef ds at kemper.freedesktop.org
Thu Dec 27 16:19:47 PST 2007


 liboil/c/Makefile.am       |    3 
 liboil/c/wavelet.c         |  327 ++-------------------------------------------
 liboil/i386/mas.c          |    7 
 liboil/i386/wavelet.c      |  159 +++++++++++++++++++++
 liboil/liboilarray.c       |   26 +++
 liboil/liboilclasses.h     |    4 
 liboil/liboilcpu-arm.c     |    4 
 liboil/liboilfuncs-04.h    |    4 
 liboil/liboilfuncs-doc.h   |    4 
 liboil/liboilfuncs.h       |   12 +
 liboil/liboiltrampolines.c |   40 +++++
 liboil/ref/wavelet.c       |   76 ++++++++++
 12 files changed, 352 insertions(+), 314 deletions(-)

New commits:
commit 84d2de5aafad0060c5a02f7cbab1bfe86c353bf0
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Thu Dec 27 16:19:27 2007 -0800

    Fix segfault when /proc isn't mounted on ARM.

diff --git a/liboil/liboilcpu-arm.c b/liboil/liboilcpu-arm.c
index 16041b4..12ac4c0 100644
--- a/liboil/liboilcpu-arm.c
+++ b/liboil/liboilcpu-arm.c
@@ -130,11 +130,13 @@ static void
 oil_cpu_detect_arm(void)
 {
 #ifdef __linux__
-  int arm_implementer, arm_arch;
+  int arm_implementer = 0;
+  int arm_arch;
   char *cpuinfo;
   char *s;
 
   cpuinfo = get_proc_cpuinfo();
+  if (cpuinfo == NULL) return;
 
   s = get_cpuinfo_line(cpuinfo, "CPU implementer");
   if (s) {
commit 8655b453a47f496dffa44250c6183d36ff701079
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Thu Dec 27 16:17:15 2007 -0800

    Add more classes needed by schro

diff --git a/liboil/c/Makefile.am b/liboil/c/Makefile.am
index e71312c..556134a 100644
--- a/liboil/c/Makefile.am
+++ b/liboil/c/Makefile.am
@@ -13,7 +13,8 @@ c_sources = \
 	ag_clamp.c \
 	composite.c \
 	copy.c \
-	swab.c
+	swab.c \
+	wavelet.c
 
 lib_c_la_SOURCES = \
         $(c_sources)
diff --git a/liboil/c/wavelet.c b/liboil/c/wavelet.c
index fdb6ca2..173aec0 100644
--- a/liboil/c/wavelet.c
+++ b/liboil/c/wavelet.c
@@ -4,320 +4,23 @@
 
 
 void
-split_53_nomix (int16_t *d_2xn, int16_t *s_2xn, int n)
+multiply_and_acc_6xn_s16_u8_c (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
 {
-  int i;
-    
-  if (n == 0) return;
-  /* predict */
-  for(i=1;i<n*2-2;i+=2){
-    d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 1);
-  }
-  d_2xn[n*2-1] = s_2xn[n*2-1] - s_2xn[n*2-2];
-
-  /* update */
-  d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
-  for(i=2;i<n*2;i+=2){
-    d_2xn[i] = s_2xn[i] + ((d_2xn[i-1] + d_2xn[i+1]) >> 2);
-  }
-} 
-OIL_DEFINE_IMPL (split_53_nomix, split_53);
-  
-#if 0
-void
-synth_53_nomix (int16_t *d_2xn, int16_t *s_2xn, int n)
-{ 
-  int i;
-    
-  /* predict */ 
-  i_n[0] -= i_n[1] >> 1;
-  for(i=2;i<n*2;i+=2){
-    i_n[i] -= (i_n[i-1] + i_n[i+1]) >> 2;
-  }
-  
-  /* update */
-  for(i=1;i<n*2-2;i+=2){
-    i_n[i] += (i_n[i+1] + i_n[i-1]) >> 1;
-  }
-  i_n[n*2-1] += i_n[n*2-2];
-}
-#endif
-
-
-void
-split_53_c (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int i;
-
-  if (n == 0) return;
-  if (n == 1) {
-    d_2xn[1] = s_2xn[1] - s_2xn[0];
-    d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
-  } else {
-    d_2xn[1] = s_2xn[1] - ((s_2xn[0] + s_2xn[2]) >> 1);
-    d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
-    d_2xn+=2;
-    s_2xn+=2;
-    for(i=0;i<(n*2-4)/2;i++){
-      d_2xn[1] = s_2xn[1] - ((s_2xn[0] + s_2xn[2]) >> 1);
-      d_2xn[0] = s_2xn[0] + ((d_2xn[-1] + d_2xn[1]) >> 2);
-      d_2xn+=2;
-      s_2xn+=2;
-    }
-    d_2xn[1] = s_2xn[1] - s_2xn[0];
-    d_2xn[0] = s_2xn[0] + ((d_2xn[-1] + d_2xn[1]) >> 2);
-  }
-}
-OIL_DEFINE_IMPL (split_53_c, split_53);
-
-void
-synth_53_c (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int i;
-
-  if (n == 0) return;
-  if (n == 1) {
-    d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
-    d_2xn[1] = s_2xn[1] + d_2xn[0];
-  } else {
-    d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
-    for(i=2;i<n*2-2;i+=2){
-      d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 2);
-      d_2xn[i-1] = s_2xn[i-1] + ((d_2xn[i] + d_2xn[i-2]) >> 1);
-    }
-    d_2xn[n*2-2] = s_2xn[n*2-2] - ((s_2xn[n*2-3] + s_2xn[n*2-1]) >> 2);
-    d_2xn[n*2-3] = s_2xn[n*2-3] + ((d_2xn[n*2-2] + d_2xn[n*2-4]) >> 1);
-    d_2xn[n*2-1] = s_2xn[n*2-1] + d_2xn[n*2-2];
-  }
-}
-OIL_DEFINE_IMPL (synth_53_c, synth_53);
-
-void
-deinterleave_c_1 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int i;
-
-  for(i=0;i<n;i++) {
-    d_2xn[i] = s_2xn[2*i];
-    d_2xn[n + i] = s_2xn[2*i + 1];
-  }
-}
-OIL_DEFINE_IMPL (deinterleave_c_1, deinterleave);
-
-void
-deinterleave_asm (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int16_t *d2;
-
-  if (n == 0) return;
-
-  d2 = d_2xn + n;
-  while (n&1) {
-    d_2xn[0] = s_2xn[0];
-    d2[0] = s_2xn[1];
-    d_2xn++;
-    d2++;
-    s_2xn+=2;
-    n--;
-  }
-
-  asm volatile ("\n"
-      "  mov %3, %%ecx\n"
-      "  sub $2, %%ecx\n"
-      "1:\n"
-      "  movw (%1,%%ecx,4), %%ax\n"
-      "  movw %%ax, (%0,%%ecx,2)\n"
-      "  movw 2(%1,%%ecx,4), %%ax\n"
-      "  movw %%ax, (%2,%%ecx,2)\n"
-      "  movw 4(%1,%%ecx,4), %%ax\n"
-      "  movw %%ax, 2(%0,%%ecx,2)\n"
-      "  movw 6(%1,%%ecx,4), %%ax\n"
-      "  movw %%ax, 2(%2,%%ecx,2)\n"
-      "  sub $2, %%ecx\n"
-      "  jge 1b\n"
-      : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
-      : "m" (n)
-      : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_asm, deinterleave);
-
-void
-deinterleave_mmx (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int16_t *d2;
-
-  d2 = d_2xn + n;
-
-  while (n&3) {
-    d_2xn[0] = s_2xn[0];
-    d2[0] = s_2xn[1];
-    d_2xn++;
-    d2++;
-    s_2xn+=2;
-    n--;
-  }
-  if (n==0) return;
-
-  asm volatile ("\n"
-      "  xor %%ecx, %%ecx\n"
-      "1:\n"
-      "  movq (%1,%%ecx,4), %%mm0\n"
-      "  movq 8(%1,%%ecx,4), %%mm1\n"
-      "  pslld $16, %%mm0\n"
-      "  pslld $16, %%mm1\n"
-      "  psrad $16, %%mm0\n"
-      "  psrad $16, %%mm1\n"
-      "  packssdw %%mm1, %%mm0\n"
-      "  movq %%mm0, (%0,%%ecx,2)\n"
-      "  movq (%1,%%ecx,4), %%mm0\n"
-      "  movq 8(%1,%%ecx,4), %%mm1\n"
-      "  psrad $16, %%mm0\n"
-      "  psrad $16, %%mm1\n"
-      "  packssdw %%mm1, %%mm0\n"
-      "  movq %%mm0, (%2,%%ecx,2)\n"
-      "  add $4, %%ecx\n"
-      "  cmp %3, %%ecx\n"
-      "  jl 1b\n"
-      "  emms\n"
-      : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
-      : "m" (n)
-      : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx, deinterleave);
-
-void
-deinterleave_mmx_2 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int16_t *d2;
-
-  d2 = d_2xn + n;
-
-  while (n&3) {
-    d_2xn[0] = s_2xn[0];
-    d2[0] = s_2xn[1];
-    d_2xn++;
-    d2++;
-    s_2xn+=2;
-    n--;
-  }
-  if (n==0) return;
-
-  asm volatile ("\n"
-      "  xor %%ecx, %%ecx\n"
-      "1:\n"
-      "  pshufw $0xd8, (%1,%%ecx,4), %%mm0\n"
-      "  movd %%mm0, (%0,%%ecx,2)\n"
-      "  pshufw $0x8d, (%1,%%ecx,4), %%mm0\n"
-      "  movd %%mm0, (%2,%%ecx,2)\n"
-      "  add $2, %%ecx\n"
-      "  cmp %3, %%ecx\n"
-      "  jl 1b\n"
-      "  emms\n"
-      : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
-      : "m" (n)
-      : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx_2, deinterleave);
-
-void
-deinterleave_mmx_3 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int16_t *d2;
-
-  d2 = d_2xn + n;
-
-  while (n&3) {
-    d_2xn[0] = s_2xn[0];
-    d2[0] = s_2xn[1];
-    d_2xn++;
-    d2++;
-    s_2xn+=2;
-    n--;
-  }
-  if (n==0) return;
-
-  asm volatile ("\n"
-      "  xor %%ecx, %%ecx\n"
-      "1:\n"
-      "  movq (%1,%%ecx,4), %%mm1\n"
-      "  movq (%1,%%ecx,4), %%mm2\n"
-      "  movq 8(%1,%%ecx,4), %%mm0\n"
-      "  punpcklwd %%mm0, %%mm1\n"
-      "  punpckhwd %%mm0, %%mm2\n"
-      "  movq %%mm1, %%mm0\n"
-      "  punpcklwd %%mm2, %%mm0\n"
-      "  punpckhwd %%mm2, %%mm1\n"
-      "  movq %%mm0, (%0,%%ecx,2)\n"
-      "  movq %%mm1, (%2,%%ecx,2)\n"
-      "  add $4, %%ecx\n"
-      "  cmp %3, %%ecx\n"
-      "  jl 1b\n"
-      "  emms\n"
-      : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
-      : "m" (n)
-      : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx_3, deinterleave);
-
-void
-deinterleave_mmx_4 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int16_t *d2;
-
-  d2 = d_2xn + n;
-
-  while (n&7) {
-    d_2xn[0] = s_2xn[0];
-    d2[0] = s_2xn[1];
-    d_2xn++;
-    d2++;
-    s_2xn+=2;
-    n--;
-  }
-  if (n==0) return;
-
-  asm volatile ("\n"
-      "  xor %%ecx, %%ecx\n"
-      "1:\n"
-      "  movq (%1,%%ecx,4), %%mm1\n"
-      "  movq %%mm1, %%mm2\n"
-      "  movq 8(%1,%%ecx,4), %%mm0\n"
-      "   movq 16(%1,%%ecx,4), %%mm5\n"
-      "  punpcklwd %%mm0, %%mm1\n"
-      "   movq %%mm5, %%mm6\n"
-      "  punpckhwd %%mm0, %%mm2\n"
-      "   movq 24(%1,%%ecx,4), %%mm4\n"
-      "  movq %%mm1, %%mm0\n"
-      "   punpcklwd %%mm4, %%mm5\n"
-      "  punpcklwd %%mm2, %%mm0\n"
-      "   punpckhwd %%mm4, %%mm6\n"
-      "  punpckhwd %%mm2, %%mm1\n"
-      "   movq %%mm5, %%mm4\n"
-      "  movq %%mm0, (%0,%%ecx,2)\n"
-      "   punpcklwd %%mm6, %%mm4\n"
-      "  movq %%mm1, (%2,%%ecx,2)\n"
-      "   punpckhwd %%mm6, %%mm5\n"
-      "   movq %%mm4, 8(%0,%%ecx,2)\n"
-      "   movq %%mm5, 8(%2,%%ecx,2)\n"
-      "  add $8, %%ecx\n"
-      "  cmp %3, %%ecx\n"
-      "  jl 1b\n"
-      "  emms\n"
-      : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
-      : "m" (n)
-      : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx_4, deinterleave);
-
-void
-interleave_c (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
-  int i;
+  int j;
+  for(j=0;j<n;j++){
+    i1[0] += s1[0]*s2[0];
+    i1[1] += s1[1]*s2[1];
+    i1[2] += s1[2]*s2[2];
+    i1[3] += s1[3]*s2[3];
+    i1[4] += s1[4]*s2[4];
+    i1[5] += s1[5]*s2[5];
 
-  for(i=0;i<n;i++) {
-    d_2xn[2*i] = s_2xn[i];
-    d_2xn[2*i + 1] = s_2xn[n + i];
+    i1 = OIL_OFFSET(i1,is1);
+    s1 = OIL_OFFSET(s1,ss1);
+    s2 = OIL_OFFSET(s2,ss2);
   }
 }
-OIL_DEFINE_IMPL (interleave_c, interleave);
+OIL_DEFINE_IMPL (multiply_and_acc_6xn_s16_u8_c,
+    multiply_and_acc_6xn_s16_u8);
 
diff --git a/liboil/i386/wavelet.c b/liboil/i386/wavelet.c
index f766e40..114cc8d 100644
--- a/liboil/i386/wavelet.c
+++ b/liboil/i386/wavelet.c
@@ -1991,3 +1991,162 @@ lshift_s16_mmx_2(int16_t *d1, int16_t *s1, int16_t *s3_1, int n)
 OIL_DEFINE_IMPL_FULL (lshift_s16_mmx_2, lshift_s16, OIL_IMPL_FLAG_MMX);
 
 
+void
+multiply_and_acc_6xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  /* FIXME this reads outside the arrays.  Bad. */
+  if (n==0) return;
+  __asm__ __volatile__ ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "1:\n"
+      "  movd 0(%2), %%mm0\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pmullw 0(%1), %%mm0\n"
+      "  paddw 0(%0), %%mm0\n"
+      "  movq %%mm0, 0(%0)\n"
+      "   movd 4(%2), %%mm1\n"
+      "   punpcklbw %%mm7, %%mm1\n"
+      "   pmullw 8(%1), %%mm1\n"
+      "   paddw 8(%0), %%mm1\n"
+      "   movd %%mm1, 8(%0)\n"
+
+      "  addl %4, %0\n"
+      "  addl %5, %1\n"
+      "  addl %6, %2\n"
+      "  decl %3\n"
+      "  jnz 1b\n"
+      "  emms\n"
+      : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+      : "m" (is1), "m" (ss1), "m" (ss2)
+      );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_6xn_s16_u8_mmx,
+    multiply_and_acc_6xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
+void
+multiply_and_acc_8xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  if (n==0) return;
+  __asm__ __volatile__ ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "1:\n"
+      "  movd 0(%2), %%mm0\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pmullw 0(%1), %%mm0\n"
+      "  paddw 0(%0), %%mm0\n"
+      "  movq %%mm0, 0(%0)\n"
+      "   movd 4(%2), %%mm1\n"
+      "   punpcklbw %%mm7, %%mm1\n"
+      "   pmullw 8(%1), %%mm1\n"
+      "   paddw 8(%0), %%mm1\n"
+      "   movq %%mm1, 8(%0)\n"
+
+      "  addl %4, %0\n"
+      "  addl %5, %1\n"
+      "  addl %6, %2\n"
+      "  decl %3\n"
+      "  jnz 1b\n"
+      "  emms\n"
+      : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+      : "m" (is1), "m" (ss1), "m" (ss2)
+      );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_8xn_s16_u8_mmx,
+    multiply_and_acc_8xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
+void
+multiply_and_acc_16xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  if (n==0) return;
+  __asm__ __volatile__ ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "1:\n"
+      "  movd 0(%2), %%mm0\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pmullw 0(%1), %%mm0\n"
+      "  paddw 0(%0), %%mm0\n"
+      "  movq %%mm0, 0(%0)\n"
+      "   movd 4(%2), %%mm1\n"
+      "   punpcklbw %%mm7, %%mm1\n"
+      "   pmullw 8(%1), %%mm1\n"
+      "   paddw 8(%0), %%mm1\n"
+      "   movq %%mm1, 8(%0)\n"
+      "    movd 8(%2), %%mm2\n"
+      "    punpcklbw %%mm7, %%mm2\n"
+      "    pmullw 16(%1), %%mm2\n"
+      "    paddw 16(%0), %%mm2\n"
+      "    movq %%mm2, 16(%0)\n"
+      "     movd 12(%2), %%mm2\n"
+      "     punpcklbw %%mm7, %%mm2\n"
+      "     pmullw 24(%1), %%mm2\n"
+      "     paddw 24(%0), %%mm2\n"
+      "     movq %%mm2, 24(%0)\n"
+
+      "  addl %4, %0\n"
+      "  addl %5, %1\n"
+      "  addl %6, %2\n"
+      "  decl %3\n"
+      "  jnz 1b\n"
+      "  emms\n"
+      : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+      : "m" (is1), "m" (ss1), "m" (ss2)
+      );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_16xn_s16_u8_mmx,
+    multiply_and_acc_16xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
+void
+multiply_and_acc_24xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  if (n==0) return;
+  __asm__ __volatile__ ("\n"
+      "  pxor %%mm7, %%mm7\n"
+      "1:\n"
+      "  movd 0(%2), %%mm0\n"
+      "  punpcklbw %%mm7, %%mm0\n"
+      "  pmullw 0(%1), %%mm0\n"
+      "  paddw 0(%0), %%mm0\n"
+      "  movq %%mm0, 0(%0)\n"
+      "   movd 4(%2), %%mm1\n"
+      "   punpcklbw %%mm7, %%mm1\n"
+      "   pmullw 8(%1), %%mm1\n"
+      "   paddw 8(%0), %%mm1\n"
+      "   movq %%mm1, 8(%0)\n"
+      "    movd 8(%2), %%mm2\n"
+      "    punpcklbw %%mm7, %%mm2\n"
+      "    pmullw 16(%1), %%mm2\n"
+      "    paddw 16(%0), %%mm2\n"
+      "    movq %%mm2, 16(%0)\n"
+      "     movd 12(%2), %%mm2\n"
+      "     punpcklbw %%mm7, %%mm2\n"
+      "     pmullw 24(%1), %%mm2\n"
+      "     paddw 24(%0), %%mm2\n"
+      "     movq %%mm2, 24(%0)\n"
+      " movd 16(%2), %%mm2\n"
+      " punpcklbw %%mm7, %%mm2\n"
+      " pmullw 32(%1), %%mm2\n"
+      " paddw 32(%0), %%mm2\n"
+      " movq %%mm2, 32(%0)\n"
+      "  movd 20(%2), %%mm2\n"
+      "  punpcklbw %%mm7, %%mm2\n"
+      "  pmullw 40(%1), %%mm2\n"
+      "  paddw 40(%0), %%mm2\n"
+      "  movq %%mm2, 40(%0)\n"
+
+      "  addl %4, %0\n"
+      "  addl %5, %1\n"
+      "  addl %6, %2\n"
+      "  decl %3\n"
+      "  jnz 1b\n"
+      "  emms\n"
+      : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+      : "m" (is1), "m" (ss1), "m" (ss2)
+      );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_24xn_s16_u8_mmx,
+    multiply_and_acc_24xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
diff --git a/liboil/liboilarray.c b/liboil/liboilarray.c
index 0a28df0..6f38012 100644
--- a/liboil/liboilarray.c
+++ b/liboil/liboilarray.c
@@ -263,6 +263,10 @@ extern OilFunctionClass _oil_function_class_mix_u8;
 extern OilFunctionClass _oil_function_class_mt19937;
 extern OilFunctionClass _oil_function_class_mult8x8_s16;
 extern OilFunctionClass _oil_function_class_multiply_and_acc_12xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_16xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_24xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_6xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_8xn_s16_u8;
 extern OilFunctionClass _oil_function_class_multiply_and_add_s16;
 extern OilFunctionClass _oil_function_class_multiply_and_add_s16_u8;
 extern OilFunctionClass _oil_function_class_multiply_f32;
@@ -661,6 +665,10 @@ OilFunctionClass *_oil_function_class_array[] = {
   &_oil_function_class_mt19937,
   &_oil_function_class_mult8x8_s16,
   &_oil_function_class_multiply_and_acc_12xn_s16_u8,
+  &_oil_function_class_multiply_and_acc_16xn_s16_u8,
+  &_oil_function_class_multiply_and_acc_24xn_s16_u8,
+  &_oil_function_class_multiply_and_acc_6xn_s16_u8,
+  &_oil_function_class_multiply_and_acc_8xn_s16_u8,
   &_oil_function_class_multiply_and_add_s16,
   &_oil_function_class_multiply_and_add_s16_u8,
   &_oil_function_class_multiply_f32,
@@ -1545,6 +1553,15 @@ extern OilFunctionImpl _oil_function_impl_mult8x8_s16_mmx;
 extern OilFunctionImpl _oil_function_impl_mult8x8_s16_ref;
 extern OilFunctionImpl _oil_function_impl_multiply_and_acc_12xn_s16_u8_mmx;
 extern OilFunctionImpl _oil_function_impl_multiply_and_acc_12xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_16xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_16xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_24xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_24xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_6xn_s16_u8_c;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_6xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_6xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_8xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_8xn_s16_u8_ref;
 extern OilFunctionImpl _oil_function_impl_multiply_and_add_s16_mmx;
 extern OilFunctionImpl _oil_function_impl_multiply_and_add_s16_ref;
 extern OilFunctionImpl _oil_function_impl_multiply_and_add_s16_u8_mmx;
@@ -2607,6 +2624,15 @@ OilFunctionImpl *_oil_function_impl_array[] = {
   &_oil_function_impl_mult8x8_s16_ref,
   &_oil_function_impl_multiply_and_acc_12xn_s16_u8_mmx,
   &_oil_function_impl_multiply_and_acc_12xn_s16_u8_ref,
+  &_oil_function_impl_multiply_and_acc_16xn_s16_u8_mmx,
+  &_oil_function_impl_multiply_and_acc_16xn_s16_u8_ref,
+  &_oil_function_impl_multiply_and_acc_24xn_s16_u8_mmx,
+  &_oil_function_impl_multiply_and_acc_24xn_s16_u8_ref,
+  &_oil_function_impl_multiply_and_acc_6xn_s16_u8_c,
+  &_oil_function_impl_multiply_and_acc_6xn_s16_u8_mmx,
+  &_oil_function_impl_multiply_and_acc_6xn_s16_u8_ref,
+  &_oil_function_impl_multiply_and_acc_8xn_s16_u8_mmx,
+  &_oil_function_impl_multiply_and_acc_8xn_s16_u8_ref,
   &_oil_function_impl_multiply_and_add_s16_mmx,
   &_oil_function_impl_multiply_and_add_s16_ref,
   &_oil_function_impl_multiply_and_add_s16_u8_mmx,
diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h
index f05e874..2ec8640 100644
--- a/liboil/liboilclasses.h
+++ b/liboil/liboilclasses.h
@@ -297,6 +297,10 @@ OIL_DECLARE_CLASS(mix_u8);
 OIL_DECLARE_CLASS(mt19937);
 OIL_DECLARE_CLASS(mult8x8_s16);
 OIL_DECLARE_CLASS(multiply_and_acc_12xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_16xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_24xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_6xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_8xn_s16_u8);
 OIL_DECLARE_CLASS(multiply_and_add_s16);
 OIL_DECLARE_CLASS(multiply_and_add_s16_u8);
 OIL_DECLARE_CLASS(multiply_f32);
diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h
index a4b3a13..e8512e8 100644
--- a/liboil/liboilfuncs-04.h
+++ b/liboil/liboilfuncs-04.h
@@ -297,6 +297,10 @@ void oil_mix_u8 (uint8_t * dest, const uint8_t * src1, const uint8_t * src2, con
 void oil_mt19937 (uint32_t * d_624, uint32_t * i_624);
 void oil_mult8x8_s16 (int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2);
 void oil_multiply_and_acc_12xn_s16_u8 (int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
+void oil_multiply_and_acc_16xn_s16_u8 (int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_multiply_and_acc_24xn_s16_u8 (int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n);
+void oil_multiply_and_acc_6xn_s16_u8 (int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n);
+void oil_multiply_and_acc_8xn_s16_u8 (int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
 void oil_multiply_and_add_s16 (int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n);
 void oil_multiply_and_add_s16_u8 (int16_t * d, const int16_t * src1, const int16_t * src2, const uint8_t * src3, int n);
 void oil_multiply_f32 (float * d, const float * s1, const float * s2, int n);
diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h
index 21f20cd..7e25d90 100644
--- a/liboil/liboilfuncs-doc.h
+++ b/liboil/liboilfuncs-doc.h
@@ -261,6 +261,10 @@ void oil_mix_u8 (uint8_t * dest, const uint8_t * src1, const uint8_t * src2, con
 void oil_mt19937 (uint32_t * d_624, uint32_t * i_624);
 void oil_mult8x8_s16 (int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2);
 void oil_multiply_and_acc_12xn_s16_u8 (int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
+void oil_multiply_and_acc_16xn_s16_u8 (int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_multiply_and_acc_24xn_s16_u8 (int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n);
+void oil_multiply_and_acc_6xn_s16_u8 (int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n);
+void oil_multiply_and_acc_8xn_s16_u8 (int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
 void oil_multiply_and_add_s16 (int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n);
 void oil_multiply_and_add_s16_u8 (int16_t * d, const int16_t * src1, const int16_t * src2, const uint8_t * src3, int n);
 void oil_multiply_f32 (float * d, const float * s1, const float * s2, int n);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index 39c8382..d841485 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -819,6 +819,18 @@ typedef void (*_oil_type_mult8x8_s16)(int16_t * d_8x8, const int16_t * s1_8x8, c
 extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_12xn_s16_u8;
 typedef void (*_oil_type_multiply_and_acc_12xn_s16_u8)(int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
 #define oil_multiply_and_acc_12xn_s16_u8 ((_oil_type_multiply_and_acc_12xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_12xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_16xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_16xn_s16_u8)(int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+#define oil_multiply_and_acc_16xn_s16_u8 ((_oil_type_multiply_and_acc_16xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_16xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_24xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_24xn_s16_u8)(int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n);
+#define oil_multiply_and_acc_24xn_s16_u8 ((_oil_type_multiply_and_acc_24xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_24xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_6xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_6xn_s16_u8)(int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n);
+#define oil_multiply_and_acc_6xn_s16_u8 ((_oil_type_multiply_and_acc_6xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_6xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_8xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_8xn_s16_u8)(int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
+#define oil_multiply_and_acc_8xn_s16_u8 ((_oil_type_multiply_and_acc_8xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_8xn_s16_u8))
 extern OilFunctionClass *oil_function_class_ptr_multiply_and_add_s16;
 typedef void (*_oil_type_multiply_and_add_s16)(int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n);
 #define oil_multiply_and_add_s16 ((_oil_type_multiply_and_add_s16)(*(void **)oil_function_class_ptr_multiply_and_add_s16))
diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c
index d48ccfa..ccc7e4e 100644
--- a/liboil/liboiltrampolines.c
+++ b/liboil/liboiltrampolines.c
@@ -2641,6 +2641,46 @@ oil_multiply_and_acc_12xn_s16_u8 (int16_t * i1_12xn, int is1, const int16_t * s1
   ((void (*)(int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n))(_oil_function_class_multiply_and_acc_12xn_s16_u8.func))(i1_12xn, is1, s1_12xn, ss1, s2_12xn, ss2, n);
 }
 
+#undef oil_multiply_and_acc_16xn_s16_u8
+void
+oil_multiply_and_acc_16xn_s16_u8 (int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n)
+{
+  if (_oil_function_class_multiply_and_acc_16xn_s16_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_multiply_and_acc_16xn_s16_u8);
+  }
+  ((void (*)(int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n))(_oil_function_class_multiply_and_acc_16xn_s16_u8.func))(i1_16xn, is1, s1_16xn, ss1, s2_16xn, ss2, n);
+}
+
+#undef oil_multiply_and_acc_24xn_s16_u8
+void
+oil_multiply_and_acc_24xn_s16_u8 (int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n)
+{
+  if (_oil_function_class_multiply_and_acc_24xn_s16_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_multiply_and_acc_24xn_s16_u8);
+  }
+  ((void (*)(int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n))(_oil_function_class_multiply_and_acc_24xn_s16_u8.func))(i1_24xn, is1, s1_24xn, ss1, s2_24xn, ss2, n);
+}
+
+#undef oil_multiply_and_acc_6xn_s16_u8
+void
+oil_multiply_and_acc_6xn_s16_u8 (int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n)
+{
+  if (_oil_function_class_multiply_and_acc_6xn_s16_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_multiply_and_acc_6xn_s16_u8);
+  }
+  ((void (*)(int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n))(_oil_function_class_multiply_and_acc_6xn_s16_u8.func))(i1_6xn, is1, s1_6xn, ss1, s2_6xn, ss2, n);
+}
+
+#undef oil_multiply_and_acc_8xn_s16_u8
+void
+oil_multiply_and_acc_8xn_s16_u8 (int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n)
+{
+  if (_oil_function_class_multiply_and_acc_8xn_s16_u8.func == NULL) {
+    oil_class_optimize (&_oil_function_class_multiply_and_acc_8xn_s16_u8);
+  }
+  ((void (*)(int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n))(_oil_function_class_multiply_and_acc_8xn_s16_u8.func))(i1_8xn, is1, s1_8xn, ss1, s2_8xn, ss2, n);
+}
+
 #undef oil_multiply_and_add_s16
 void
 oil_multiply_and_add_s16 (int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n)
diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c
index a8a41c9..b7e8175 100644
--- a/liboil/ref/wavelet.c
+++ b/liboil/ref/wavelet.c
@@ -72,8 +72,16 @@ OIL_DEFINE_CLASS (multiply_and_add_s16,
     "int16_t *d, int16_t *src1, int16_t *src2, int16_t *src3, int n");
 OIL_DEFINE_CLASS (multiply_and_add_s16_u8,
     "int16_t *d, int16_t *src1, int16_t *src2, uint8_t *src3, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_6xn_s16_u8, "int16_t *i1_6xn, int is1, "
+    "int16_t *s1_6xn, int ss1, uint8_t *s2_6xn, int ss2, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_8xn_s16_u8, "int16_t *i1_8xn, int is1, "
+    "int16_t *s1_8xn, int ss1, uint8_t *s2_8xn, int ss2, int n");
 OIL_DEFINE_CLASS (multiply_and_acc_12xn_s16_u8, "int16_t *i1_12xn, int is1, "
     "int16_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_16xn_s16_u8, "int16_t *i1_16xn, int is1, "
+    "int16_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_24xn_s16_u8, "int16_t *i1_24xn, int is1, "
+    "int16_t *s1_24xn, int ss1, uint8_t *s2_24xn, int ss2, int n");
 
 void
 deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n)
@@ -537,6 +545,40 @@ multiply_and_add_s16_u8_ref (int16_t *d, int16_t *src1, int16_t *src2,
 OIL_DEFINE_IMPL_REF (multiply_and_add_s16_u8_ref, multiply_and_add_s16_u8);
 
 void
+multiply_and_acc_6xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  int i, j;
+  for(j=0;j<n;j++){
+    for(i=0;i<6;i++){
+      i1[i] += s1[i]*s2[i];
+    }
+    i1 = OIL_OFFSET(i1,is1);
+    s1 = OIL_OFFSET(s1,ss1);
+    s2 = OIL_OFFSET(s2,ss2);
+  }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_6xn_s16_u8_ref,
+    multiply_and_acc_6xn_s16_u8);
+
+void
+multiply_and_acc_8xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  int i, j;
+  for(j=0;j<n;j++){
+    for(i=0;i<8;i++){
+      i1[i] += s1[i]*s2[i];
+    }
+    i1 = OIL_OFFSET(i1,is1);
+    s1 = OIL_OFFSET(s1,ss1);
+    s2 = OIL_OFFSET(s2,ss2);
+  }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_8xn_s16_u8_ref,
+    multiply_and_acc_8xn_s16_u8);
+
+void
 multiply_and_acc_12xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
     int ss1, uint8_t *s2, int ss2, int n)
 {
@@ -553,3 +595,37 @@ multiply_and_acc_12xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
 OIL_DEFINE_IMPL_REF (multiply_and_acc_12xn_s16_u8_ref,
     multiply_and_acc_12xn_s16_u8);
 
+void
+multiply_and_acc_16xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  int i, j;
+  for(j=0;j<n;j++){
+    for(i=0;i<16;i++){
+      i1[i] += s1[i]*s2[i];
+    }
+    i1 = OIL_OFFSET(i1,is1);
+    s1 = OIL_OFFSET(s1,ss1);
+    s2 = OIL_OFFSET(s2,ss2);
+  }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_16xn_s16_u8_ref,
+    multiply_and_acc_16xn_s16_u8);
+
+void
+multiply_and_acc_24xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+    int ss1, uint8_t *s2, int ss2, int n)
+{
+  int i, j;
+  for(j=0;j<n;j++){
+    for(i=0;i<24;i++){
+      i1[i] += s1[i]*s2[i];
+    }
+    i1 = OIL_OFFSET(i1,is1);
+    s1 = OIL_OFFSET(s1,ss1);
+    s2 = OIL_OFFSET(s2,ss2);
+  }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_24xn_s16_u8_ref,
+    multiply_and_acc_24xn_s16_u8);
+
commit 32b26088358267d84b90171eeeb781d5702ac288
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Thu Dec 27 16:13:56 2007 -0800

    Fix some missing n==0 checks

diff --git a/liboil/i386/mas.c b/liboil/i386/mas.c
index 4347d90..ebce3f6 100644
--- a/liboil/i386/mas.c
+++ b/liboil/i386/mas.c
@@ -21,6 +21,7 @@ mas10_u8_mmx (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
     n--;
   }
 
+  if (n == 0) return;
   n>>=2;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"
@@ -101,6 +102,7 @@ mas10_u8_mmx_2 (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
     ptr[4*j + 3] = s2_10[j];
   }
 
+  if (n == 0) return;
   n>>=2;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"
@@ -168,6 +170,7 @@ mas10_u8_mmx_3 (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
     n--;
   }
 
+  if (n == 0) return;
   n>>=2;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"
@@ -274,6 +277,7 @@ void
 mas10_u8_mmx_4 (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
         const int16_t *s3_2, int n)
 {
+  if (n == 0) return;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"
 
@@ -342,6 +346,7 @@ mas8_u8_mmx_3 (uint8_t *d, const uint8_t *s1_np7, const int16_t *s2_8,
     n--;
   }
 
+  if (n == 0) return;
   n>>=2;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"
@@ -428,6 +433,7 @@ void
 mas8_u8_mmx_4 (uint8_t *d, const uint8_t *s1_np7, const int16_t *s2_8,
         const int16_t *s3_2, int n)
 {
+  if (n == 0) return;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"
 
@@ -490,6 +496,7 @@ mas8_u8_sym_mmx_3 (uint8_t *d, const uint8_t *s1_np7, const int16_t *s2_8,
     n--;
   }
 
+  if (n == 0) return;
   n>>=2;
   __asm__ __volatile__("\n"
       "  pxor %%mm7, %%mm7\n"


More information about the Liboil-commit mailing list