[Liboil-commit] liboil/i386 liboil/i386_amd64

David Schleef ds at kemper.freedesktop.org
Mon Feb 25 23:49:30 PST 2008


 liboil/i386/copy_i386.c  |  419 -----------------------------------------------
 liboil/i386_amd64/copy.c |    9 -
 2 files changed, 6 insertions(+), 422 deletions(-)

New commits:
commit 02069f6470b0864715fe6a83f673690256d8bda0
Author: David Schleef <ds at ginger.bigkitten.com>
Date:   Mon Feb 25 23:51:09 2008 -0800

    Try again.  Add ecx to clobbered registers.  Fixes #14643.

diff --git a/liboil/i386/copy_i386.c b/liboil/i386/copy_i386.c
deleted file mode 100644
index a48738b..0000000
--- a/liboil/i386/copy_i386.c
+++ /dev/null
@@ -1,419 +0,0 @@
-/*
- * LIBOIL - Library of Optimized Inner Loops
- * Copyright (c) 2004 David A. Schleef <ds at schleef.org>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
- * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
- * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#include <liboil/liboilfunction.h>
-#include <liboil/liboilclasses.h>
-
-
-static void
-copy_u8_i386_mmx (uint8_t *dest, uint8_t *src, int n)
-{
-  if (n&4) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  while(n&0x7) {
-    *dest++ = *src++;
-    n--;
-  }
-  if (n) asm volatile (
-      "  mov $0, %%eax\n"
-      "1:\n"
-      "  movq (%1,%%eax), %%mm0\n"
-      "  movq %%mm0, (%0,%%eax)\n"
-      "  add $8, %%eax\n"
-      "  decl %%ecx\n"
-      "  jne 1b\n"
-      "  emms\n"
-      : "+r" (dest), "+r" (src)
-      : "c" (n/8)
-      : "eax");
-}
-OIL_DEFINE_IMPL_FULL (copy_u8_i386_mmx, copy_u8, OIL_IMPL_FLAG_MMX);
-
-static void
-copy_u8_mmx2 (uint8_t *dest, uint8_t *src, int n)
-{
-  while (n&0xc) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  while(n&0xf) {
-    *dest++ = *src++;
-    n--;
-  }
-  if (n) asm volatile (
-      "  mov $0, %%eax\n"
-      "1:\n"
-      "  movq (%1,%%eax), %%mm0\n"
-      "  movq %%mm0, (%0,%%eax)\n"
-      "  movq 8(%1,%%eax), %%mm0\n"
-      "  movq %%mm0, 8(%0,%%eax)\n"
-      "  add $16, %%eax\n"
-      "  decl %%ecx\n"
-      "  jne 1b\n"
-      "  emms\n"
-      : "+r" (dest), "+r" (src)
-      : "c" (n/16)
-      : "eax");
-}
-OIL_DEFINE_IMPL_FULL (copy_u8_mmx2, copy_u8, OIL_IMPL_FLAG_MMX);
-
-#if 0
-static void
-copy_u8_mmx3 (uint8_t *dest, uint8_t *src, int n)
-{
-  /* make sure destination is cache-line aligned for output */
-  if (n < 64) {
-    while (n>0) {
-      *dest++ = *src++;
-      n--;
-    }
-    return;
-  }
-  while (((unsigned long)dest) & 0x3) {
-    *dest++ = *src++;
-    n--;
-  }
-  while (((unsigned long)dest) & 0x3f) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  if (n > 64) asm volatile (
-      "  mov $0, %%eax\n"
-      "1:\n"
-      //"  prefetchnta 128(%1,%%eax)\n"
-      "  movq (%1,%%eax), %%mm0\n"
-      "  movq 8(%1,%%eax), %%mm1\n"
-      "  movq 16(%1,%%eax), %%mm2\n"
-      "  movq 24(%1,%%eax), %%mm3\n"
-      "  movq 32(%1,%%eax), %%mm4\n"
-      "  movq 40(%1,%%eax), %%mm5\n"
-      "  movq 48(%1,%%eax), %%mm6\n"
-      "  movq 56(%1,%%eax), %%mm7\n"
-      "  movntq %%mm0, (%0,%%eax)\n"
-      "  movntq %%mm1, 8(%0,%%eax)\n"
-      "  movntq %%mm2, 16(%0,%%eax)\n"
-      "  movntq %%mm3, 24(%0,%%eax)\n"
-      "  movntq %%mm4, 32(%0,%%eax)\n"
-      "  movntq %%mm5, 40(%0,%%eax)\n"
-      "  movntq %%mm6, 48(%0,%%eax)\n"
-      "  movntq %%mm7, 56(%0,%%eax)\n"
-      "  add $64, %%eax\n"
-      "  decl %%ecx\n"
-      "  jne 1b\n"
-      "  sfence\n"
-      "  emms\n"
-      : "+r" (dest), "+r" (src)
-      : "c" (n>>6)
-      : "eax");
-
-  dest += n&(~(0x3f));
-  src += n&(~(0x3f));
-  n &= 0x3f;
-  while (n > 3) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  while (n > 0) {
-    *dest++ = *src++;
-    n--;
-  }
-}
-OIL_DEFINE_IMPL_FULL (copy_u8_mmx3, copy_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
-#endif
-
-#if 0
-static void
-copy_u8_mmx4 (uint8_t *dest, uint8_t *src, int n)
-{
-  /* make sure destination is cache-line aligned for output */
-  if (n < 32) {
-    while (n>0) {
-      *dest++ = *src++;
-      n--;
-    }
-    return;
-  }
-  while (((unsigned long)dest) & 0x3) {
-    *dest++ = *src++;
-    n--;
-  }
-  while (((unsigned long)dest) & 0x1f) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  if (n > 32) asm volatile (
-      "  mov $0, %%eax\n"
-      "1:\n"
-      //"  prefetchnta 128(%1,%%eax)\n"
-      "  movq (%1,%%eax), %%mm0\n"
-      "  movq 8(%1,%%eax), %%mm1\n"
-      "  movq 16(%1,%%eax), %%mm2\n"
-      "  movq 24(%1,%%eax), %%mm3\n"
-      "  movntq %%mm0, (%0,%%eax)\n"
-      "  movntq %%mm1, 8(%0,%%eax)\n"
-      "  movntq %%mm2, 16(%0,%%eax)\n"
-      "  movntq %%mm3, 24(%0,%%eax)\n"
-      "  add $32, %%eax\n"
-      "  decl %%ecx\n"
-      "  jne 1b\n"
-      "  sfence\n"
-      "  emms\n"
-      : "+r" (dest), "+r" (src)
-      : "c" (n>>5)
-      : "eax");
-
-  dest += n&(~(0x1f));
-  src += n&(~(0x1f));
-  n &= 0x1f;
-  while (n > 3) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  while (n > 0) {
-    *dest++ = *src++;
-    n--;
-  }
-}
-OIL_DEFINE_IMPL_FULL (copy_u8_mmx4, copy_u8, OIL_IMPL_FLAG_MMX | OIL_IMPL_FLAG_MMXEXT);
-#endif
-
-static void
-copy_u8_mmx5 (uint8_t *dest, uint8_t *src, int n)
-{
-  while (n&0xc) {
-    *(uint32_t *)dest = *(uint32_t *)src;
-    dest += 4;
-    src += 4;
-    n-=4;
-  }
-  while(n&0xf) {
-    *dest++ = *src++;
-    n--;
-  }
-  if (n) asm volatile (
-      "  mov $0, %%eax\n"
-      "1:\n"
-      "  movq (%1,%%eax), %%mm0\n"
-      "  movq 8(%1,%%eax), %%mm1\n"
-      "  movq %%mm0, (%0,%%eax)\n"
-      "  movq %%mm1, 8(%0,%%eax)\n"
-      "  add $16, %%eax\n"
-      "  decl %%ecx\n"
-      "  jne 1b\n"
-      "  emms\n"
-      : "+r" (dest), "+r" (src)
-      : "c" (n/16)
-      : "eax");
-}
-OIL_DEFINE_IMPL_FULL (copy_u8_mmx5, copy_u8, OIL_IMPL_FLAG_MMX);
-
-
-static void splat_u8_ns_i386_mmx (uint8_t *dest, const uint8_t *param, int n)
-{
-  uint32_t p;
-  while(n&7) {
-    *dest = *param;
-    dest++;
-    n--;
-  }
-  if (n==0) return;
-  n >>= 3;
-  p = (*param<<24) | (*param<<16) | (*param<<8) | (*param);
-  asm volatile (
-    "  movd %2, %%mm0\n"
-    "  punpcklbw %%mm0, %%mm0\n"
-    "1:\n"
-    "  movq %%mm0, (%0)\n"
-    "  add $8, %0\n"
-    "  decl %1\n"
-    "  jnz 1b\n"
-    "  emms\n"
-    : "+r" (dest), "+r" (n), "+r" (p));
-}
-OIL_DEFINE_IMPL_FULL (splat_u8_ns_i386_mmx, splat_u8_ns, OIL_IMPL_FLAG_MMX);
-
-static void splat_u8_ns_mmx2 (uint8_t *dest, const uint8_t *param, int n)
-{
-  uint32_t p;
-  while(n&15) {
-    *dest = *param;
-    dest++;
-    n--;
-  }
-  if (n==0) return;
-  n >>= 4;
-  p = (*param<<24) | (*param<<16) | (*param<<8) | (*param);
-  asm volatile (
-    "  movd %2, %%mm0\n"
-    "  punpcklbw %%mm0, %%mm0\n"
-    "1:\n"
-    "  movq %%mm0, (%0)\n"
-    "  movq %%mm0, 8(%0)\n"
-    "  add $16, %0\n"
-    "  decl %1\n"
-    "  jnz 1b\n"
-    "  emms\n"
-    : "+r" (dest), "+r" (n), "+r" (p));
-}
-OIL_DEFINE_IMPL_FULL(splat_u8_ns_mmx2, splat_u8_ns, OIL_IMPL_FLAG_MMX);
-
-static void splat_u8_ns_mmx2a (uint8_t *dest, const uint8_t *param, int n)
-{
-  uint32_t p;
-  p = *param;
-  p |= p<<8;
-  p |= p<<16;
-  if (n<16) {
-    while(n>0) {
-      *dest = *param;
-      dest++;
-      n--;
-    }
-    return;
-  }
-  asm volatile (
-    "  movd %2, %%mm0\n"
-    "  punpcklbw %%mm0, %%mm0\n"
-    "  movq %%mm0, (%0)\n"
-    "  movq %%mm0, 8(%0)\n"
-    "  movl %1, %%eax\n"
-    "  and $0xf, %%eax\n"
-    "  add %%eax, %0\n"
-    "  shr $4, %1\n"
-    "1:\n"
-    "  movq %%mm0, (%0)\n"
-    "  movq %%mm0, 8(%0)\n"
-    "  add $16, %0\n"
-    "  decl %1\n"
-    "  jnz 1b\n"
-    "  emms\n"
-    : "+r" (dest), "+r" (n), "+r" (p)
-    :
-    : "eax");
-}
-OIL_DEFINE_IMPL_FULL(splat_u8_ns_mmx2a, splat_u8_ns, OIL_IMPL_FLAG_MMX);
-
-static void splat_u16_ns_mmx (uint16_t *dest, const uint16_t *src, int n)
-{
-  while(n&3) {
-    *dest = *src;
-    dest++;
-    n--;
-  }
-  if (n==0) return;
-  n >>= 2;
-  asm volatile (
-    "  movzwl 0(%[src]), %%ecx\n"
-    "  movd %%ecx, %%mm0\n"
-    "  pshufw $00, %%mm0, %%mm0\n"
-    "1:\n"
-    "  movq %%mm0, (%[dest])\n"
-    "  add $8, %0\n"
-    "  decl %[n]\n"
-    "  jnz 1b\n"
-    "  emms\n"
-    : [dest] "+r" (dest),
-      [n] "+r" (n)
-    : [src] "r" (src)
-    : "ecx");
-}
-OIL_DEFINE_IMPL_FULL (splat_u16_ns_mmx, splat_u16_ns, OIL_IMPL_FLAG_MMX);
-
-static void splat_u16_ns_mmx_2 (uint16_t *dest, const uint16_t *src, int n)
-{
-  while(n&7) {
-    *dest = *src;
-    dest++;
-    n--;
-  }
-  if (n==0) return;
-  n >>= 3;
-  asm volatile (
-    "  movzwl 0(%[src]), %%ecx\n"
-    "  movd %%ecx, %%mm0\n"
-    "  pshufw $00, %%mm0, %%mm0\n"
-    "1:\n"
-    "  movq %%mm0, 0(%[dest])\n"
-    "  movq %%mm0, 8(%[dest])\n"
-    "  add $16, %0\n"
-    "  decl %[n]\n"
-    "  jnz 1b\n"
-    "  emms\n"
-    : [dest] "+r" (dest),
-      [n] "+r" (n)
-    : [src] "r" (src),
-    : "ecx");
-}
-OIL_DEFINE_IMPL_FULL (splat_u16_ns_mmx_2, splat_u16_ns, OIL_IMPL_FLAG_MMX);
-
-static void splat_u16_ns_mmx_3 (uint16_t *dest, const uint16_t *src, int n)
-{
-  while(n&15) {
-    *dest = *src;
-    dest++;
-    n--;
-  }
-  if (n==0) return;
-  n >>= 4;
-  asm volatile (
-    "  movzwl 0(%[src]), %%ecx\n"
-    "  movd %%ecx, %%mm0\n"
-    "  pshufw $00, %%mm0, %%mm0\n"
-    "1:\n"
-    "  movq %%mm0, 0(%[dest])\n"
-    "  movq %%mm0, 8(%[dest])\n"
-    "  movq %%mm0, 16(%[dest])\n"
-    "  movq %%mm0, 24(%[dest])\n"
-    "  add $32, %0\n"
-    "  decl %[n]\n"
-    "  jnz 1b\n"
-    "  emms\n"
-    : [dest] "+r" (dest),
-      [n] "+r" (n)
-    : [src] "r" (src)
-    : "ecx");
-}
-OIL_DEFINE_IMPL_FULL (splat_u16_ns_mmx_3, splat_u16_ns, OIL_IMPL_FLAG_MMX);
-
diff --git a/liboil/i386_amd64/copy.c b/liboil/i386_amd64/copy.c
index 763283d..e9dbae8 100644
--- a/liboil/i386_amd64/copy.c
+++ b/liboil/i386_amd64/copy.c
@@ -375,7 +375,8 @@ static void splat_u16_ns_mmx (uint16_t *dest, const uint16_t *src, int n)
     "  emms\n"
     : [dest] "+r" (dest),
       [n] "+r" (n)
-    : [src] "r" (src));
+    : [src] "r" (src)
+    : "ecx");
 }
 OIL_DEFINE_IMPL_FULL (splat_u16_ns_mmx, splat_u16_ns, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
 
@@ -401,7 +402,8 @@ static void splat_u16_ns_mmx_2 (uint16_t *dest, const uint16_t *src, int n)
     "  emms\n"
     : [dest] "+r" (dest),
       [n] "+r" (n)
-    : [src] "r" (src));
+    : [src] "r" (src)
+    : "ecx");
 }
 OIL_DEFINE_IMPL_FULL (splat_u16_ns_mmx_2, splat_u16_ns, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
 
@@ -429,7 +431,8 @@ static void splat_u16_ns_mmx_3 (uint16_t *dest, const uint16_t *src, int n)
     "  emms\n"
     : [dest] "+r" (dest),
       [n] "+r" (n)
-    : [src] "r" (src));
+    : [src] "r" (src)
+    : "ecx");
 }
 OIL_DEFINE_IMPL_FULL (splat_u16_ns_mmx_3, splat_u16_ns, OIL_IMPL_FLAG_MMX|OIL_IMPL_FLAG_MMXEXT);
 


More information about the Liboil-commit mailing list