[PATCH] Add DEC Alpha sum_s16 fast path

Matt Turner mattst88 at gmail.com
Thu Nov 5 14:25:16 PST 2009


Lifted from Compaq's Compiler Writer's Guide for the Alpha 21264,
appendix B.

http://h18000.www1.hp.com/cpq-alphaserver/technology/literature/cmpwrgd.pdf

Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
 configure.ac             |    1 +
 liboil/Makefile.am       |    9 +++++-
 liboil/alpha/Makefile.am |    8 +++++
 liboil/alpha/sum.c       |   78 ++++++++++++++++++++++++++++++++++++++++++++++
 m4/as-host-defines.m4    |    5 +++
 5 files changed, 100 insertions(+), 1 deletions(-)
 create mode 100644 liboil/alpha/Makefile.am
 create mode 100644 liboil/alpha/sum.c

diff --git a/configure.ac b/configure.ac
index 8145e64..d60da26 100644
--- a/configure.ac
+++ b/configure.ac
@@ -256,6 +256,7 @@ AC_CONFIG_FILES([
 Makefile 
 doc/Makefile
 liboil/Makefile
+liboil/alpha/Makefile
 liboil/amd64/Makefile
 liboil/3dnow/Makefile
 liboil/c/Makefile
diff --git a/liboil/Makefile.am b/liboil/Makefile.am
index 4991a34..7367ecb 100644
--- a/liboil/Makefile.am
+++ b/liboil/Makefile.am
@@ -1,7 +1,7 @@
 
 pkgincludedir = $(includedir)/liboil- at LIBOIL_MAJORMINOR@/liboil
 
-DIST_SUBDIRS = amd64 3dnow c colorspace conv copy dct fb i386 i386_amd64 jpeg math md5 mmx motovec powerpc powerpc_asm_blocks ref simdpack sse utf8 deprecated arm
+DIST_SUBDIRS = alpha amd64 3dnow c colorspace conv copy dct fb i386 i386_amd64 jpeg math md5 mmx motovec powerpc powerpc_asm_blocks ref simdpack sse utf8 deprecated arm
 SUBDIRS = c colorspace conv copy dct jpeg math md5 ref simdpack utf8 deprecated
 
 EXTRA_DIST = README
@@ -53,6 +53,13 @@ libs += arm/libarm.la
 endif
 endif
 
+if HAVE_ALPHA
+if HAVE_GCC_ASM
+SUBDIRS += alpha
+libs += alpha/libalpha.la
+endif
+endif
+
 
 lib_LTLIBRARIES = liboil- at LIBOIL_MAJORMINOR@.la
 if USE_NEW_ABI
diff --git a/liboil/alpha/Makefile.am b/liboil/alpha/Makefile.am
new file mode 100644
index 0000000..1a8f913
--- /dev/null
+++ b/liboil/alpha/Makefile.am
@@ -0,0 +1,8 @@
+
+noinst_LTLIBRARIES = libalpha.la
+
+libalpha_la_SOURCES = \
+	sum.c
+
+libalpha_la_CFLAGS = $(LIBOIL_CFLAGS)
+
diff --git a/liboil/alpha/sum.c b/liboil/alpha/sum.c
new file mode 100644
index 0000000..238b2b3
--- /dev/null
+++ b/liboil/alpha/sum.c
@@ -0,0 +1,78 @@
+
+#include <liboil/liboilfunction.h>
+#include <liboil/liboilclasses.h>
+
+void
+sum_s16_alpha (int16_t *d, int16_t *s, int n)
+{
+	/*
+	#  16 is the pointer p
+	#  17 is the counter length
+	#  24, 25 hold the partial sums on loop exit
+	#  18, 19 have the first two quadwords of data:
+	#     18: HGFEDCBA
+	#     19: PONMLKJI
+	*/
+
+	unsigned long tmp1, tmp2; /* 0 and 1 */
+	unsigned long tmp3, tmp4; /* 18 and 19 */
+	unsigned long tmp5, tmp6; /* 24 and 25 */
+	unsigned long tmp7;       /* 27 */
+	unsigned int sum = 0;
+  
+	while(n&15) {
+		sum += s[0];
+		s++;
+		n--;
+	}
+
+	if (n > 0) {
+
+		asm volatile(
+			"	ldq    %2, 0(%7)\n"
+			"	bis    $31, $31,   %4\n"
+			"	ldq    %3, 8(%7)\n"
+			"	bis    $31, $31,   %5\n"
+
+			".align 4                      # Octaword alignment\n"
+			"1:	zapnot %2,  0x33,   %0 # U1 chunk 0: 00FE00BA\n"
+			"	bis    $31, $31,   $31 # L  NOP\n"
+			"	zap    %2,  0x33,   %1 # U0 chunk 1: HG00DC00\n"
+			"	ldq    %2,  16(%7)     # L1 load 2 ahead *p: HGFEDCBA\n"
+
+			"	addq   %4,  %0,     %4 # U1 accumulate 0\n"
+			"	bis    $31, $31,   $31 # L  NOP\n"
+			"	srl    %1,  16,     %1 # U0 shift: 00HG00DC\n"
+			"	lda    %8,  -8(%8)     # L0 countdown\n"
+
+			"	zapnot %3,  0x33,   %0 # U1 chunk 3: 00NM00JI\n"
+			"	bis    $31, $31,   $31 # L  NOP\n"
+			"	zap    %3,  0x33,   %6 # U0 chunk 4: PO00LK00\n"
+			"	ldq    %3,  24(%7)     # L0 load 2 ahead *p: PONMLKJIL1\n"
+
+			"	addq   %4,  %0,     %4 # U1 accumulate 0\n"
+			"	addq   %5,  %1,     %5 # L0 accumulate 1\n"
+			"	srl    %6,  16,     %6 # U0 shift: 00PO00LK\n"
+			"	ldl    $31, 512(%7)    # L1 prefetch\n"
+
+			"	lda    %7,  16(%7)     # U1 p++\n"
+			"	addq   %5,  %6,     %5 # L0 accumulate 1\n"
+			"	bgt    %8,  1b         # U0 loop control\n"
+			"	bis    $31, $31,   $31 # L  NOP (replace with fall through)\n"
+
+			: "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3),
+			  "=&r" (tmp4), "=&r" (tmp5), "=&r" (tmp6), "=&r" (tmp7)
+			: "r" (s), "r" (n)
+		);
+
+		tmp1 = tmp5 >> 32;
+		tmp2 = tmp5 & 0xffffffff;
+		tmp3 = tmp6 >> 32;
+		tmp4 = tmp6 & 0xffffffff;
+
+		sum += tmp1 + tmp2 + tmp3 + tmp4;
+	}
+
+	*d = sum;
+}
+OIL_DEFINE_IMPL_FULL (sum_s16_alpha, sum_s16, OIL_IMPL_FLAG_ASM);
diff --git a/m4/as-host-defines.m4 b/m4/as-host-defines.m4
index 8b813bb..8549971 100644
--- a/m4/as-host-defines.m4
+++ b/m4/as-host-defines.m4
@@ -27,12 +27,17 @@ AC_DEFUN([AS_HOST_DEFINES],
       HAVE_ARM=yes
       AC_DEFINE(HAVE_ARM, 1, [Defined if host is arm])
       ;;
+    xalpha)
+      HAVE_ALPHA=yes
+      AC_DEFINE(HAVE_ALPHA, 1, [Defined if host is alpha])
+      ;;
   esac
 
 AM_CONDITIONAL(HAVE_I386, test "x$HAVE_I386" = "xyes")
 AM_CONDITIONAL(HAVE_AMD64, test "x$HAVE_AMD64" = "xyes")
 AM_CONDITIONAL(HAVE_POWERPC, test "x$HAVE_POWERPC" = "xyes")
 AM_CONDITIONAL(HAVE_ARM, test "x$HAVE_ARM" = "xyes")
+AM_CONDITIONAL(HAVE_ALPHA, test "x$HAVE_ALPHA" = "xyes")
 
   case "${host_os}" in
     mingw*)
-- 
1.6.4.4



More information about the xorg-devel mailing list