[Liboil-commit] 3 commits - liboil/c liboil/i386 liboil/liboilarray.c liboil/liboilclasses.h liboil/liboilcpu-arm.c liboil/liboilfuncs-04.h liboil/liboilfuncs-doc.h liboil/liboilfuncs.h liboil/liboiltrampolines.c liboil/ref
David Schleef
ds at kemper.freedesktop.org
Thu Dec 27 16:19:47 PST 2007
liboil/c/Makefile.am | 3
liboil/c/wavelet.c | 327 ++-------------------------------------------
liboil/i386/mas.c | 7
liboil/i386/wavelet.c | 159 +++++++++++++++++++++
liboil/liboilarray.c | 26 +++
liboil/liboilclasses.h | 4
liboil/liboilcpu-arm.c | 4
liboil/liboilfuncs-04.h | 4
liboil/liboilfuncs-doc.h | 4
liboil/liboilfuncs.h | 12 +
liboil/liboiltrampolines.c | 40 +++++
liboil/ref/wavelet.c | 76 ++++++++++
12 files changed, 352 insertions(+), 314 deletions(-)
New commits:
commit 84d2de5aafad0060c5a02f7cbab1bfe86c353bf0
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Thu Dec 27 16:19:27 2007 -0800
Fix segfault when /proc isn't mounted on ARM.
diff --git a/liboil/liboilcpu-arm.c b/liboil/liboilcpu-arm.c
index 16041b4..12ac4c0 100644
--- a/liboil/liboilcpu-arm.c
+++ b/liboil/liboilcpu-arm.c
@@ -130,11 +130,13 @@ static void
oil_cpu_detect_arm(void)
{
#ifdef __linux__
- int arm_implementer, arm_arch;
+ int arm_implementer = 0;
+ int arm_arch;
char *cpuinfo;
char *s;
cpuinfo = get_proc_cpuinfo();
+ if (cpuinfo == NULL) return;
s = get_cpuinfo_line(cpuinfo, "CPU implementer");
if (s) {
commit 8655b453a47f496dffa44250c6183d36ff701079
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Thu Dec 27 16:17:15 2007 -0800
Add more classes needed by schro
diff --git a/liboil/c/Makefile.am b/liboil/c/Makefile.am
index e71312c..556134a 100644
--- a/liboil/c/Makefile.am
+++ b/liboil/c/Makefile.am
@@ -13,7 +13,8 @@ c_sources = \
ag_clamp.c \
composite.c \
copy.c \
- swab.c
+ swab.c \
+ wavelet.c
lib_c_la_SOURCES = \
$(c_sources)
diff --git a/liboil/c/wavelet.c b/liboil/c/wavelet.c
index fdb6ca2..173aec0 100644
--- a/liboil/c/wavelet.c
+++ b/liboil/c/wavelet.c
@@ -4,320 +4,23 @@
void
-split_53_nomix (int16_t *d_2xn, int16_t *s_2xn, int n)
+multiply_and_acc_6xn_s16_u8_c (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
{
- int i;
-
- if (n == 0) return;
- /* predict */
- for(i=1;i<n*2-2;i+=2){
- d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 1);
- }
- d_2xn[n*2-1] = s_2xn[n*2-1] - s_2xn[n*2-2];
-
- /* update */
- d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
- for(i=2;i<n*2;i+=2){
- d_2xn[i] = s_2xn[i] + ((d_2xn[i-1] + d_2xn[i+1]) >> 2);
- }
-}
-OIL_DEFINE_IMPL (split_53_nomix, split_53);
-
-#if 0
-void
-synth_53_nomix (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int i;
-
- /* predict */
- i_n[0] -= i_n[1] >> 1;
- for(i=2;i<n*2;i+=2){
- i_n[i] -= (i_n[i-1] + i_n[i+1]) >> 2;
- }
-
- /* update */
- for(i=1;i<n*2-2;i+=2){
- i_n[i] += (i_n[i+1] + i_n[i-1]) >> 1;
- }
- i_n[n*2-1] += i_n[n*2-2];
-}
-#endif
-
-
-void
-split_53_c (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int i;
-
- if (n == 0) return;
- if (n == 1) {
- d_2xn[1] = s_2xn[1] - s_2xn[0];
- d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
- } else {
- d_2xn[1] = s_2xn[1] - ((s_2xn[0] + s_2xn[2]) >> 1);
- d_2xn[0] = s_2xn[0] + (d_2xn[1] >> 1);
- d_2xn+=2;
- s_2xn+=2;
- for(i=0;i<(n*2-4)/2;i++){
- d_2xn[1] = s_2xn[1] - ((s_2xn[0] + s_2xn[2]) >> 1);
- d_2xn[0] = s_2xn[0] + ((d_2xn[-1] + d_2xn[1]) >> 2);
- d_2xn+=2;
- s_2xn+=2;
- }
- d_2xn[1] = s_2xn[1] - s_2xn[0];
- d_2xn[0] = s_2xn[0] + ((d_2xn[-1] + d_2xn[1]) >> 2);
- }
-}
-OIL_DEFINE_IMPL (split_53_c, split_53);
-
-void
-synth_53_c (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int i;
-
- if (n == 0) return;
- if (n == 1) {
- d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
- d_2xn[1] = s_2xn[1] + d_2xn[0];
- } else {
- d_2xn[0] = s_2xn[0] - (s_2xn[1] >> 1);
- for(i=2;i<n*2-2;i+=2){
- d_2xn[i] = s_2xn[i] - ((s_2xn[i-1] + s_2xn[i+1]) >> 2);
- d_2xn[i-1] = s_2xn[i-1] + ((d_2xn[i] + d_2xn[i-2]) >> 1);
- }
- d_2xn[n*2-2] = s_2xn[n*2-2] - ((s_2xn[n*2-3] + s_2xn[n*2-1]) >> 2);
- d_2xn[n*2-3] = s_2xn[n*2-3] + ((d_2xn[n*2-2] + d_2xn[n*2-4]) >> 1);
- d_2xn[n*2-1] = s_2xn[n*2-1] + d_2xn[n*2-2];
- }
-}
-OIL_DEFINE_IMPL (synth_53_c, synth_53);
-
-void
-deinterleave_c_1 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int i;
-
- for(i=0;i<n;i++) {
- d_2xn[i] = s_2xn[2*i];
- d_2xn[n + i] = s_2xn[2*i + 1];
- }
-}
-OIL_DEFINE_IMPL (deinterleave_c_1, deinterleave);
-
-void
-deinterleave_asm (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int16_t *d2;
-
- if (n == 0) return;
-
- d2 = d_2xn + n;
- while (n&1) {
- d_2xn[0] = s_2xn[0];
- d2[0] = s_2xn[1];
- d_2xn++;
- d2++;
- s_2xn+=2;
- n--;
- }
-
- asm volatile ("\n"
- " mov %3, %%ecx\n"
- " sub $2, %%ecx\n"
- "1:\n"
- " movw (%1,%%ecx,4), %%ax\n"
- " movw %%ax, (%0,%%ecx,2)\n"
- " movw 2(%1,%%ecx,4), %%ax\n"
- " movw %%ax, (%2,%%ecx,2)\n"
- " movw 4(%1,%%ecx,4), %%ax\n"
- " movw %%ax, 2(%0,%%ecx,2)\n"
- " movw 6(%1,%%ecx,4), %%ax\n"
- " movw %%ax, 2(%2,%%ecx,2)\n"
- " sub $2, %%ecx\n"
- " jge 1b\n"
- : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
- : "m" (n)
- : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_asm, deinterleave);
-
-void
-deinterleave_mmx (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int16_t *d2;
-
- d2 = d_2xn + n;
-
- while (n&3) {
- d_2xn[0] = s_2xn[0];
- d2[0] = s_2xn[1];
- d_2xn++;
- d2++;
- s_2xn+=2;
- n--;
- }
- if (n==0) return;
-
- asm volatile ("\n"
- " xor %%ecx, %%ecx\n"
- "1:\n"
- " movq (%1,%%ecx,4), %%mm0\n"
- " movq 8(%1,%%ecx,4), %%mm1\n"
- " pslld $16, %%mm0\n"
- " pslld $16, %%mm1\n"
- " psrad $16, %%mm0\n"
- " psrad $16, %%mm1\n"
- " packssdw %%mm1, %%mm0\n"
- " movq %%mm0, (%0,%%ecx,2)\n"
- " movq (%1,%%ecx,4), %%mm0\n"
- " movq 8(%1,%%ecx,4), %%mm1\n"
- " psrad $16, %%mm0\n"
- " psrad $16, %%mm1\n"
- " packssdw %%mm1, %%mm0\n"
- " movq %%mm0, (%2,%%ecx,2)\n"
- " add $4, %%ecx\n"
- " cmp %3, %%ecx\n"
- " jl 1b\n"
- " emms\n"
- : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
- : "m" (n)
- : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx, deinterleave);
-
-void
-deinterleave_mmx_2 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int16_t *d2;
-
- d2 = d_2xn + n;
-
- while (n&3) {
- d_2xn[0] = s_2xn[0];
- d2[0] = s_2xn[1];
- d_2xn++;
- d2++;
- s_2xn+=2;
- n--;
- }
- if (n==0) return;
-
- asm volatile ("\n"
- " xor %%ecx, %%ecx\n"
- "1:\n"
- " pshufw $0xd8, (%1,%%ecx,4), %%mm0\n"
- " movd %%mm0, (%0,%%ecx,2)\n"
- " pshufw $0x8d, (%1,%%ecx,4), %%mm0\n"
- " movd %%mm0, (%2,%%ecx,2)\n"
- " add $2, %%ecx\n"
- " cmp %3, %%ecx\n"
- " jl 1b\n"
- " emms\n"
- : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
- : "m" (n)
- : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx_2, deinterleave);
-
-void
-deinterleave_mmx_3 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int16_t *d2;
-
- d2 = d_2xn + n;
-
- while (n&3) {
- d_2xn[0] = s_2xn[0];
- d2[0] = s_2xn[1];
- d_2xn++;
- d2++;
- s_2xn+=2;
- n--;
- }
- if (n==0) return;
-
- asm volatile ("\n"
- " xor %%ecx, %%ecx\n"
- "1:\n"
- " movq (%1,%%ecx,4), %%mm1\n"
- " movq (%1,%%ecx,4), %%mm2\n"
- " movq 8(%1,%%ecx,4), %%mm0\n"
- " punpcklwd %%mm0, %%mm1\n"
- " punpckhwd %%mm0, %%mm2\n"
- " movq %%mm1, %%mm0\n"
- " punpcklwd %%mm2, %%mm0\n"
- " punpckhwd %%mm2, %%mm1\n"
- " movq %%mm0, (%0,%%ecx,2)\n"
- " movq %%mm1, (%2,%%ecx,2)\n"
- " add $4, %%ecx\n"
- " cmp %3, %%ecx\n"
- " jl 1b\n"
- " emms\n"
- : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
- : "m" (n)
- : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx_3, deinterleave);
-
-void
-deinterleave_mmx_4 (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int16_t *d2;
-
- d2 = d_2xn + n;
-
- while (n&7) {
- d_2xn[0] = s_2xn[0];
- d2[0] = s_2xn[1];
- d_2xn++;
- d2++;
- s_2xn+=2;
- n--;
- }
- if (n==0) return;
-
- asm volatile ("\n"
- " xor %%ecx, %%ecx\n"
- "1:\n"
- " movq (%1,%%ecx,4), %%mm1\n"
- " movq %%mm1, %%mm2\n"
- " movq 8(%1,%%ecx,4), %%mm0\n"
- " movq 16(%1,%%ecx,4), %%mm5\n"
- " punpcklwd %%mm0, %%mm1\n"
- " movq %%mm5, %%mm6\n"
- " punpckhwd %%mm0, %%mm2\n"
- " movq 24(%1,%%ecx,4), %%mm4\n"
- " movq %%mm1, %%mm0\n"
- " punpcklwd %%mm4, %%mm5\n"
- " punpcklwd %%mm2, %%mm0\n"
- " punpckhwd %%mm4, %%mm6\n"
- " punpckhwd %%mm2, %%mm1\n"
- " movq %%mm5, %%mm4\n"
- " movq %%mm0, (%0,%%ecx,2)\n"
- " punpcklwd %%mm6, %%mm4\n"
- " movq %%mm1, (%2,%%ecx,2)\n"
- " punpckhwd %%mm6, %%mm5\n"
- " movq %%mm4, 8(%0,%%ecx,2)\n"
- " movq %%mm5, 8(%2,%%ecx,2)\n"
- " add $8, %%ecx\n"
- " cmp %3, %%ecx\n"
- " jl 1b\n"
- " emms\n"
- : "+r" (d_2xn), "+r" (s_2xn), "+r" (d2)
- : "m" (n)
- : "eax", "ecx");
-}
-OIL_DEFINE_IMPL (deinterleave_mmx_4, deinterleave);
-
-void
-interleave_c (int16_t *d_2xn, int16_t *s_2xn, int n)
-{
- int i;
+ int j;
+ for(j=0;j<n;j++){
+ i1[0] += s1[0]*s2[0];
+ i1[1] += s1[1]*s2[1];
+ i1[2] += s1[2]*s2[2];
+ i1[3] += s1[3]*s2[3];
+ i1[4] += s1[4]*s2[4];
+ i1[5] += s1[5]*s2[5];
- for(i=0;i<n;i++) {
- d_2xn[2*i] = s_2xn[i];
- d_2xn[2*i + 1] = s_2xn[n + i];
+ i1 = OIL_OFFSET(i1,is1);
+ s1 = OIL_OFFSET(s1,ss1);
+ s2 = OIL_OFFSET(s2,ss2);
}
}
-OIL_DEFINE_IMPL (interleave_c, interleave);
+OIL_DEFINE_IMPL (multiply_and_acc_6xn_s16_u8_c,
+ multiply_and_acc_6xn_s16_u8);
diff --git a/liboil/i386/wavelet.c b/liboil/i386/wavelet.c
index f766e40..114cc8d 100644
--- a/liboil/i386/wavelet.c
+++ b/liboil/i386/wavelet.c
@@ -1991,3 +1991,162 @@ lshift_s16_mmx_2(int16_t *d1, int16_t *s1, int16_t *s3_1, int n)
OIL_DEFINE_IMPL_FULL (lshift_s16_mmx_2, lshift_s16, OIL_IMPL_FLAG_MMX);
+void
+multiply_and_acc_6xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ /* FIXME this reads outside the arrays. Bad. */
+ if (n==0) return;
+ __asm__ __volatile__ ("\n"
+ " pxor %%mm7, %%mm7\n"
+ "1:\n"
+ " movd 0(%2), %%mm0\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pmullw 0(%1), %%mm0\n"
+ " paddw 0(%0), %%mm0\n"
+ " movq %%mm0, 0(%0)\n"
+ " movd 4(%2), %%mm1\n"
+ " punpcklbw %%mm7, %%mm1\n"
+ " pmullw 8(%1), %%mm1\n"
+ " paddw 8(%0), %%mm1\n"
+ " movd %%mm1, 8(%0)\n"
+
+ " addl %4, %0\n"
+ " addl %5, %1\n"
+ " addl %6, %2\n"
+ " decl %3\n"
+ " jnz 1b\n"
+ " emms\n"
+ : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+ : "m" (is1), "m" (ss1), "m" (ss2)
+ );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_6xn_s16_u8_mmx,
+ multiply_and_acc_6xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
+void
+multiply_and_acc_8xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ if (n==0) return;
+ __asm__ __volatile__ ("\n"
+ " pxor %%mm7, %%mm7\n"
+ "1:\n"
+ " movd 0(%2), %%mm0\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pmullw 0(%1), %%mm0\n"
+ " paddw 0(%0), %%mm0\n"
+ " movq %%mm0, 0(%0)\n"
+ " movd 4(%2), %%mm1\n"
+ " punpcklbw %%mm7, %%mm1\n"
+ " pmullw 8(%1), %%mm1\n"
+ " paddw 8(%0), %%mm1\n"
+ " movq %%mm1, 8(%0)\n"
+
+ " addl %4, %0\n"
+ " addl %5, %1\n"
+ " addl %6, %2\n"
+ " decl %3\n"
+ " jnz 1b\n"
+ " emms\n"
+ : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+ : "m" (is1), "m" (ss1), "m" (ss2)
+ );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_8xn_s16_u8_mmx,
+ multiply_and_acc_8xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
+void
+multiply_and_acc_16xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ if (n==0) return;
+ __asm__ __volatile__ ("\n"
+ " pxor %%mm7, %%mm7\n"
+ "1:\n"
+ " movd 0(%2), %%mm0\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pmullw 0(%1), %%mm0\n"
+ " paddw 0(%0), %%mm0\n"
+ " movq %%mm0, 0(%0)\n"
+ " movd 4(%2), %%mm1\n"
+ " punpcklbw %%mm7, %%mm1\n"
+ " pmullw 8(%1), %%mm1\n"
+ " paddw 8(%0), %%mm1\n"
+ " movq %%mm1, 8(%0)\n"
+ " movd 8(%2), %%mm2\n"
+ " punpcklbw %%mm7, %%mm2\n"
+ " pmullw 16(%1), %%mm2\n"
+ " paddw 16(%0), %%mm2\n"
+ " movq %%mm2, 16(%0)\n"
+ " movd 12(%2), %%mm2\n"
+ " punpcklbw %%mm7, %%mm2\n"
+ " pmullw 24(%1), %%mm2\n"
+ " paddw 24(%0), %%mm2\n"
+ " movq %%mm2, 24(%0)\n"
+
+ " addl %4, %0\n"
+ " addl %5, %1\n"
+ " addl %6, %2\n"
+ " decl %3\n"
+ " jnz 1b\n"
+ " emms\n"
+ : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+ : "m" (is1), "m" (ss1), "m" (ss2)
+ );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_16xn_s16_u8_mmx,
+ multiply_and_acc_16xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
+void
+multiply_and_acc_24xn_s16_u8_mmx (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ if (n==0) return;
+ __asm__ __volatile__ ("\n"
+ " pxor %%mm7, %%mm7\n"
+ "1:\n"
+ " movd 0(%2), %%mm0\n"
+ " punpcklbw %%mm7, %%mm0\n"
+ " pmullw 0(%1), %%mm0\n"
+ " paddw 0(%0), %%mm0\n"
+ " movq %%mm0, 0(%0)\n"
+ " movd 4(%2), %%mm1\n"
+ " punpcklbw %%mm7, %%mm1\n"
+ " pmullw 8(%1), %%mm1\n"
+ " paddw 8(%0), %%mm1\n"
+ " movq %%mm1, 8(%0)\n"
+ " movd 8(%2), %%mm2\n"
+ " punpcklbw %%mm7, %%mm2\n"
+ " pmullw 16(%1), %%mm2\n"
+ " paddw 16(%0), %%mm2\n"
+ " movq %%mm2, 16(%0)\n"
+ " movd 12(%2), %%mm2\n"
+ " punpcklbw %%mm7, %%mm2\n"
+ " pmullw 24(%1), %%mm2\n"
+ " paddw 24(%0), %%mm2\n"
+ " movq %%mm2, 24(%0)\n"
+ " movd 16(%2), %%mm2\n"
+ " punpcklbw %%mm7, %%mm2\n"
+ " pmullw 32(%1), %%mm2\n"
+ " paddw 32(%0), %%mm2\n"
+ " movq %%mm2, 32(%0)\n"
+ " movd 20(%2), %%mm2\n"
+ " punpcklbw %%mm7, %%mm2\n"
+ " pmullw 40(%1), %%mm2\n"
+ " paddw 40(%0), %%mm2\n"
+ " movq %%mm2, 40(%0)\n"
+
+ " addl %4, %0\n"
+ " addl %5, %1\n"
+ " addl %6, %2\n"
+ " decl %3\n"
+ " jnz 1b\n"
+ " emms\n"
+ : "+r" (i1), "+r" (s1), "+r" (s2), "+r" (n)
+ : "m" (is1), "m" (ss1), "m" (ss2)
+ );
+}
+OIL_DEFINE_IMPL_FULL (multiply_and_acc_24xn_s16_u8_mmx,
+ multiply_and_acc_24xn_s16_u8, OIL_IMPL_FLAG_MMX);
+
diff --git a/liboil/liboilarray.c b/liboil/liboilarray.c
index 0a28df0..6f38012 100644
--- a/liboil/liboilarray.c
+++ b/liboil/liboilarray.c
@@ -263,6 +263,10 @@ extern OilFunctionClass _oil_function_class_mix_u8;
extern OilFunctionClass _oil_function_class_mt19937;
extern OilFunctionClass _oil_function_class_mult8x8_s16;
extern OilFunctionClass _oil_function_class_multiply_and_acc_12xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_16xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_24xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_6xn_s16_u8;
+extern OilFunctionClass _oil_function_class_multiply_and_acc_8xn_s16_u8;
extern OilFunctionClass _oil_function_class_multiply_and_add_s16;
extern OilFunctionClass _oil_function_class_multiply_and_add_s16_u8;
extern OilFunctionClass _oil_function_class_multiply_f32;
@@ -661,6 +665,10 @@ OilFunctionClass *_oil_function_class_array[] = {
&_oil_function_class_mt19937,
&_oil_function_class_mult8x8_s16,
&_oil_function_class_multiply_and_acc_12xn_s16_u8,
+ &_oil_function_class_multiply_and_acc_16xn_s16_u8,
+ &_oil_function_class_multiply_and_acc_24xn_s16_u8,
+ &_oil_function_class_multiply_and_acc_6xn_s16_u8,
+ &_oil_function_class_multiply_and_acc_8xn_s16_u8,
&_oil_function_class_multiply_and_add_s16,
&_oil_function_class_multiply_and_add_s16_u8,
&_oil_function_class_multiply_f32,
@@ -1545,6 +1553,15 @@ extern OilFunctionImpl _oil_function_impl_mult8x8_s16_mmx;
extern OilFunctionImpl _oil_function_impl_mult8x8_s16_ref;
extern OilFunctionImpl _oil_function_impl_multiply_and_acc_12xn_s16_u8_mmx;
extern OilFunctionImpl _oil_function_impl_multiply_and_acc_12xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_16xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_16xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_24xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_24xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_6xn_s16_u8_c;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_6xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_6xn_s16_u8_ref;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_8xn_s16_u8_mmx;
+extern OilFunctionImpl _oil_function_impl_multiply_and_acc_8xn_s16_u8_ref;
extern OilFunctionImpl _oil_function_impl_multiply_and_add_s16_mmx;
extern OilFunctionImpl _oil_function_impl_multiply_and_add_s16_ref;
extern OilFunctionImpl _oil_function_impl_multiply_and_add_s16_u8_mmx;
@@ -2607,6 +2624,15 @@ OilFunctionImpl *_oil_function_impl_array[] = {
&_oil_function_impl_mult8x8_s16_ref,
&_oil_function_impl_multiply_and_acc_12xn_s16_u8_mmx,
&_oil_function_impl_multiply_and_acc_12xn_s16_u8_ref,
+ &_oil_function_impl_multiply_and_acc_16xn_s16_u8_mmx,
+ &_oil_function_impl_multiply_and_acc_16xn_s16_u8_ref,
+ &_oil_function_impl_multiply_and_acc_24xn_s16_u8_mmx,
+ &_oil_function_impl_multiply_and_acc_24xn_s16_u8_ref,
+ &_oil_function_impl_multiply_and_acc_6xn_s16_u8_c,
+ &_oil_function_impl_multiply_and_acc_6xn_s16_u8_mmx,
+ &_oil_function_impl_multiply_and_acc_6xn_s16_u8_ref,
+ &_oil_function_impl_multiply_and_acc_8xn_s16_u8_mmx,
+ &_oil_function_impl_multiply_and_acc_8xn_s16_u8_ref,
&_oil_function_impl_multiply_and_add_s16_mmx,
&_oil_function_impl_multiply_and_add_s16_ref,
&_oil_function_impl_multiply_and_add_s16_u8_mmx,
diff --git a/liboil/liboilclasses.h b/liboil/liboilclasses.h
index f05e874..2ec8640 100644
--- a/liboil/liboilclasses.h
+++ b/liboil/liboilclasses.h
@@ -297,6 +297,10 @@ OIL_DECLARE_CLASS(mix_u8);
OIL_DECLARE_CLASS(mt19937);
OIL_DECLARE_CLASS(mult8x8_s16);
OIL_DECLARE_CLASS(multiply_and_acc_12xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_16xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_24xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_6xn_s16_u8);
+OIL_DECLARE_CLASS(multiply_and_acc_8xn_s16_u8);
OIL_DECLARE_CLASS(multiply_and_add_s16);
OIL_DECLARE_CLASS(multiply_and_add_s16_u8);
OIL_DECLARE_CLASS(multiply_f32);
diff --git a/liboil/liboilfuncs-04.h b/liboil/liboilfuncs-04.h
index a4b3a13..e8512e8 100644
--- a/liboil/liboilfuncs-04.h
+++ b/liboil/liboilfuncs-04.h
@@ -297,6 +297,10 @@ void oil_mix_u8 (uint8_t * dest, const uint8_t * src1, const uint8_t * src2, con
void oil_mt19937 (uint32_t * d_624, uint32_t * i_624);
void oil_mult8x8_s16 (int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2);
void oil_multiply_and_acc_12xn_s16_u8 (int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
+void oil_multiply_and_acc_16xn_s16_u8 (int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_multiply_and_acc_24xn_s16_u8 (int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n);
+void oil_multiply_and_acc_6xn_s16_u8 (int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n);
+void oil_multiply_and_acc_8xn_s16_u8 (int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
void oil_multiply_and_add_s16 (int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n);
void oil_multiply_and_add_s16_u8 (int16_t * d, const int16_t * src1, const int16_t * src2, const uint8_t * src3, int n);
void oil_multiply_f32 (float * d, const float * s1, const float * s2, int n);
diff --git a/liboil/liboilfuncs-doc.h b/liboil/liboilfuncs-doc.h
index 21f20cd..7e25d90 100644
--- a/liboil/liboilfuncs-doc.h
+++ b/liboil/liboilfuncs-doc.h
@@ -261,6 +261,10 @@ void oil_mix_u8 (uint8_t * dest, const uint8_t * src1, const uint8_t * src2, con
void oil_mt19937 (uint32_t * d_624, uint32_t * i_624);
void oil_mult8x8_s16 (int16_t * d_8x8, const int16_t * s1_8x8, const int16_t * s2_8x8, int ds, int ss1, int ss2);
void oil_multiply_and_acc_12xn_s16_u8 (int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
+void oil_multiply_and_acc_16xn_s16_u8 (int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+void oil_multiply_and_acc_24xn_s16_u8 (int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n);
+void oil_multiply_and_acc_6xn_s16_u8 (int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n);
+void oil_multiply_and_acc_8xn_s16_u8 (int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
void oil_multiply_and_add_s16 (int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n);
void oil_multiply_and_add_s16_u8 (int16_t * d, const int16_t * src1, const int16_t * src2, const uint8_t * src3, int n);
void oil_multiply_f32 (float * d, const float * s1, const float * s2, int n);
diff --git a/liboil/liboilfuncs.h b/liboil/liboilfuncs.h
index 39c8382..d841485 100644
--- a/liboil/liboilfuncs.h
+++ b/liboil/liboilfuncs.h
@@ -819,6 +819,18 @@ typedef void (*_oil_type_mult8x8_s16)(int16_t * d_8x8, const int16_t * s1_8x8, c
extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_12xn_s16_u8;
typedef void (*_oil_type_multiply_and_acc_12xn_s16_u8)(int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n);
#define oil_multiply_and_acc_12xn_s16_u8 ((_oil_type_multiply_and_acc_12xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_12xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_16xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_16xn_s16_u8)(int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n);
+#define oil_multiply_and_acc_16xn_s16_u8 ((_oil_type_multiply_and_acc_16xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_16xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_24xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_24xn_s16_u8)(int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n);
+#define oil_multiply_and_acc_24xn_s16_u8 ((_oil_type_multiply_and_acc_24xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_24xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_6xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_6xn_s16_u8)(int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n);
+#define oil_multiply_and_acc_6xn_s16_u8 ((_oil_type_multiply_and_acc_6xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_6xn_s16_u8))
+extern OilFunctionClass *oil_function_class_ptr_multiply_and_acc_8xn_s16_u8;
+typedef void (*_oil_type_multiply_and_acc_8xn_s16_u8)(int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n);
+#define oil_multiply_and_acc_8xn_s16_u8 ((_oil_type_multiply_and_acc_8xn_s16_u8)(*(void **)oil_function_class_ptr_multiply_and_acc_8xn_s16_u8))
extern OilFunctionClass *oil_function_class_ptr_multiply_and_add_s16;
typedef void (*_oil_type_multiply_and_add_s16)(int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n);
#define oil_multiply_and_add_s16 ((_oil_type_multiply_and_add_s16)(*(void **)oil_function_class_ptr_multiply_and_add_s16))
diff --git a/liboil/liboiltrampolines.c b/liboil/liboiltrampolines.c
index d48ccfa..ccc7e4e 100644
--- a/liboil/liboiltrampolines.c
+++ b/liboil/liboiltrampolines.c
@@ -2641,6 +2641,46 @@ oil_multiply_and_acc_12xn_s16_u8 (int16_t * i1_12xn, int is1, const int16_t * s1
((void (*)(int16_t * i1_12xn, int is1, const int16_t * s1_12xn, int ss1, const uint8_t * s2_12xn, int ss2, int n))(_oil_function_class_multiply_and_acc_12xn_s16_u8.func))(i1_12xn, is1, s1_12xn, ss1, s2_12xn, ss2, n);
}
+#undef oil_multiply_and_acc_16xn_s16_u8
+void
+oil_multiply_and_acc_16xn_s16_u8 (int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n)
+{
+ if (_oil_function_class_multiply_and_acc_16xn_s16_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_multiply_and_acc_16xn_s16_u8);
+ }
+ ((void (*)(int16_t * i1_16xn, int is1, const int16_t * s1_16xn, int ss1, const uint8_t * s2_16xn, int ss2, int n))(_oil_function_class_multiply_and_acc_16xn_s16_u8.func))(i1_16xn, is1, s1_16xn, ss1, s2_16xn, ss2, n);
+}
+
+#undef oil_multiply_and_acc_24xn_s16_u8
+void
+oil_multiply_and_acc_24xn_s16_u8 (int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n)
+{
+ if (_oil_function_class_multiply_and_acc_24xn_s16_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_multiply_and_acc_24xn_s16_u8);
+ }
+ ((void (*)(int16_t * i1_24xn, int is1, const int16_t * s1_24xn, int ss1, const uint8_t * s2_24xn, int ss2, int n))(_oil_function_class_multiply_and_acc_24xn_s16_u8.func))(i1_24xn, is1, s1_24xn, ss1, s2_24xn, ss2, n);
+}
+
+#undef oil_multiply_and_acc_6xn_s16_u8
+void
+oil_multiply_and_acc_6xn_s16_u8 (int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n)
+{
+ if (_oil_function_class_multiply_and_acc_6xn_s16_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_multiply_and_acc_6xn_s16_u8);
+ }
+ ((void (*)(int16_t * i1_6xn, int is1, const int16_t * s1_6xn, int ss1, const uint8_t * s2_6xn, int ss2, int n))(_oil_function_class_multiply_and_acc_6xn_s16_u8.func))(i1_6xn, is1, s1_6xn, ss1, s2_6xn, ss2, n);
+}
+
+#undef oil_multiply_and_acc_8xn_s16_u8
+void
+oil_multiply_and_acc_8xn_s16_u8 (int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n)
+{
+ if (_oil_function_class_multiply_and_acc_8xn_s16_u8.func == NULL) {
+ oil_class_optimize (&_oil_function_class_multiply_and_acc_8xn_s16_u8);
+ }
+ ((void (*)(int16_t * i1_8xn, int is1, const int16_t * s1_8xn, int ss1, const uint8_t * s2_8xn, int ss2, int n))(_oil_function_class_multiply_and_acc_8xn_s16_u8.func))(i1_8xn, is1, s1_8xn, ss1, s2_8xn, ss2, n);
+}
+
#undef oil_multiply_and_add_s16
void
oil_multiply_and_add_s16 (int16_t * d, const int16_t * src1, const int16_t * src2, const int16_t * src3, int n)
diff --git a/liboil/ref/wavelet.c b/liboil/ref/wavelet.c
index a8a41c9..b7e8175 100644
--- a/liboil/ref/wavelet.c
+++ b/liboil/ref/wavelet.c
@@ -72,8 +72,16 @@ OIL_DEFINE_CLASS (multiply_and_add_s16,
"int16_t *d, int16_t *src1, int16_t *src2, int16_t *src3, int n");
OIL_DEFINE_CLASS (multiply_and_add_s16_u8,
"int16_t *d, int16_t *src1, int16_t *src2, uint8_t *src3, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_6xn_s16_u8, "int16_t *i1_6xn, int is1, "
+ "int16_t *s1_6xn, int ss1, uint8_t *s2_6xn, int ss2, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_8xn_s16_u8, "int16_t *i1_8xn, int is1, "
+ "int16_t *s1_8xn, int ss1, uint8_t *s2_8xn, int ss2, int n");
OIL_DEFINE_CLASS (multiply_and_acc_12xn_s16_u8, "int16_t *i1_12xn, int is1, "
"int16_t *s1_12xn, int ss1, uint8_t *s2_12xn, int ss2, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_16xn_s16_u8, "int16_t *i1_16xn, int is1, "
+ "int16_t *s1_16xn, int ss1, uint8_t *s2_16xn, int ss2, int n");
+OIL_DEFINE_CLASS (multiply_and_acc_24xn_s16_u8, "int16_t *i1_24xn, int is1, "
+ "int16_t *s1_24xn, int ss1, uint8_t *s2_24xn, int ss2, int n");
void
deinterleave_ref (int16_t *d_2xn, int16_t *s_2xn, int n)
@@ -537,6 +545,40 @@ multiply_and_add_s16_u8_ref (int16_t *d, int16_t *src1, int16_t *src2,
OIL_DEFINE_IMPL_REF (multiply_and_add_s16_u8_ref, multiply_and_add_s16_u8);
void
+multiply_and_acc_6xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ int i, j;
+ for(j=0;j<n;j++){
+ for(i=0;i<6;i++){
+ i1[i] += s1[i]*s2[i];
+ }
+ i1 = OIL_OFFSET(i1,is1);
+ s1 = OIL_OFFSET(s1,ss1);
+ s2 = OIL_OFFSET(s2,ss2);
+ }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_6xn_s16_u8_ref,
+ multiply_and_acc_6xn_s16_u8);
+
+void
+multiply_and_acc_8xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ int i, j;
+ for(j=0;j<n;j++){
+ for(i=0;i<8;i++){
+ i1[i] += s1[i]*s2[i];
+ }
+ i1 = OIL_OFFSET(i1,is1);
+ s1 = OIL_OFFSET(s1,ss1);
+ s2 = OIL_OFFSET(s2,ss2);
+ }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_8xn_s16_u8_ref,
+ multiply_and_acc_8xn_s16_u8);
+
+void
multiply_and_acc_12xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
int ss1, uint8_t *s2, int ss2, int n)
{
@@ -553,3 +595,37 @@ multiply_and_acc_12xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
OIL_DEFINE_IMPL_REF (multiply_and_acc_12xn_s16_u8_ref,
multiply_and_acc_12xn_s16_u8);
+void
+multiply_and_acc_16xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ int i, j;
+ for(j=0;j<n;j++){
+ for(i=0;i<16;i++){
+ i1[i] += s1[i]*s2[i];
+ }
+ i1 = OIL_OFFSET(i1,is1);
+ s1 = OIL_OFFSET(s1,ss1);
+ s2 = OIL_OFFSET(s2,ss2);
+ }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_16xn_s16_u8_ref,
+ multiply_and_acc_16xn_s16_u8);
+
+void
+multiply_and_acc_24xn_s16_u8_ref (int16_t *i1, int is1, int16_t *s1,
+ int ss1, uint8_t *s2, int ss2, int n)
+{
+ int i, j;
+ for(j=0;j<n;j++){
+ for(i=0;i<24;i++){
+ i1[i] += s1[i]*s2[i];
+ }
+ i1 = OIL_OFFSET(i1,is1);
+ s1 = OIL_OFFSET(s1,ss1);
+ s2 = OIL_OFFSET(s2,ss2);
+ }
+}
+OIL_DEFINE_IMPL_REF (multiply_and_acc_24xn_s16_u8_ref,
+ multiply_and_acc_24xn_s16_u8);
+
commit 32b26088358267d84b90171eeeb781d5702ac288
Author: David Schleef <ds at ginger.bigkitten.com>
Date: Thu Dec 27 16:13:56 2007 -0800
Fix some missing n==0 checks
diff --git a/liboil/i386/mas.c b/liboil/i386/mas.c
index 4347d90..ebce3f6 100644
--- a/liboil/i386/mas.c
+++ b/liboil/i386/mas.c
@@ -21,6 +21,7 @@ mas10_u8_mmx (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
n--;
}
+ if (n == 0) return;
n>>=2;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
@@ -101,6 +102,7 @@ mas10_u8_mmx_2 (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
ptr[4*j + 3] = s2_10[j];
}
+ if (n == 0) return;
n>>=2;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
@@ -168,6 +170,7 @@ mas10_u8_mmx_3 (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
n--;
}
+ if (n == 0) return;
n>>=2;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
@@ -274,6 +277,7 @@ void
mas10_u8_mmx_4 (uint8_t *d, const uint8_t *s1_np9, const int16_t *s2_10,
const int16_t *s3_2, int n)
{
+ if (n == 0) return;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
@@ -342,6 +346,7 @@ mas8_u8_mmx_3 (uint8_t *d, const uint8_t *s1_np7, const int16_t *s2_8,
n--;
}
+ if (n == 0) return;
n>>=2;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
@@ -428,6 +433,7 @@ void
mas8_u8_mmx_4 (uint8_t *d, const uint8_t *s1_np7, const int16_t *s2_8,
const int16_t *s3_2, int n)
{
+ if (n == 0) return;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
@@ -490,6 +496,7 @@ mas8_u8_sym_mmx_3 (uint8_t *d, const uint8_t *s1_np7, const int16_t *s2_8,
n--;
}
+ if (n == 0) return;
n>>=2;
__asm__ __volatile__("\n"
" pxor %%mm7, %%mm7\n"
More information about the Liboil-commit
mailing list