[Pixman] [PATCH 1/3] Add AVX fetcher for x8r8g8b8

Matt Turner mattst88 at gmail.com
Sun May 15 20:44:38 PDT 2011


Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
 configure.ac            |   53 ++++++++++++
 pixman/Makefile.am      |   13 +++
 pixman/pixman-avx.c     |  204 +++++++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-cpu.c     |   24 ++++++
 pixman/pixman-private.h |    5 +
 5 files changed, 299 insertions(+), 0 deletions(-)
 create mode 100644 pixman/pixman-avx.c

diff --git a/configure.ac b/configure.ac
index 2defbd4..20f9b1c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -379,6 +379,59 @@ fi
 AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
 
 dnl ===========================================================================
+dnl Check for AVX
+
+if test "x$AVX_CFLAGS" = "x" ; then
+   if test "x$SUNCC" = "xyes"; then
+      # AVX is enabled by default in the Sun Studio 64-bit environment
+      if test "$AMD64_ABI" = "no" ; then
+         AVX_CFLAGS="-xarch=avx"
+      fi
+   else
+      AVX_CFLAGS="-mavx -Winline"
+   fi
+fi
+
+have_avx_intrinsics=no
+AC_MSG_CHECKING(whether to use AVX intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$AVX_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5))
+#   if !defined(__amd64__) && !defined(__x86_64__)
+#      error "Need GCC >= 4.5 for AVX intrinsics on x86"
+#   endif
+#endif
+#include <immintrin.h>
+int main () {
+    __m256i a = _mm256_set1_epi32 (0), b = _mm256_set1_epi32 (0), c;
+	c = _mm256_permute2f128_si256 (a, b, 0);
+    return 0;
+}], have_avx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(avx,
+   [AC_HELP_STRING([--disable-avx],
+                   [disable AVX fast paths])],
+   [enable_avx=$enableval], [enable_avx=auto])
+
+if test $enable_avx = no ; then
+   have_avx_intrinsics=disabled
+fi
+
+if test $have_avx_intrinsics = yes ; then
+   AC_DEFINE(USE_AVX, 1, [use AVX compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_avx_intrinsics)
+if test $enable_avx = yes && test $have_avx_intrinsics = no ; then
+   AC_MSG_ERROR([AVX intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_AVX, test $have_avx_intrinsics = yes)
+
+dnl ===========================================================================
 dnl Other special flags needed when building code using MMX or SSE instructions
 case $host_os in
    solaris*)
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index be08266..2740520 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -124,5 +124,18 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la
 ASM_CFLAGS_arm_neon=
 endif
 
+# avx code
+if USE_AVX
+noinst_LTLIBRARIES += libpixman-avx.la
+libpixman_avx_la_SOURCES = \
+	pixman-avx.c
+libpixman_avx_la_CFLAGS = $(DEP_CFLAGS) $(AVX_CFLAGS)
+libpixman_avx_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(AVX_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-avx.la
+
+ASM_CFLAGS_avx=$(AVX_CFLAGS)
+endif
+
 .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
 	$(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman-avx.c b/pixman/pixman-avx.c
new file mode 100644
index 0000000..09b867d
--- /dev/null
+++ b/pixman/pixman-avx.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2008 Rodrigo Kumpera
+ * Copyright © 2008 André Tupinambá
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Red Hat makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Rodrigo Kumpera (kumpera at gmail.com)
+ *          André Tupinambá (andrelrt at gmail.com)
+ *
+ * Based on work by Owen Taylor and Søren Sandmann
+ *
+ * AVX code mostly renamed from pixman-sse2.c by Matt Turner.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <immintrin.h> /* for AVX intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
+
+static __m256i mask_ff000000;
+
+/* load 8 pixels from a 32-byte boundary aligned address */
+static force_inline __m256i
+load_256_aligned (__m256i* src)
+{
+    return _mm256_load_si256 (src);
+}
+
+/* load 8 pixels from a unaligned address */
+static force_inline __m256i
+load_256_unaligned (const __m256i* src)
+{
+    return _mm256_loadu_si256 (src);
+}
+
+/* save 8 pixels using Write Combining memory on a 32-byte
+ * boundary aligned address
+ */
+static force_inline void
+save_256_write_combining (__m256i* dst,
+                          __m256i  data)
+{
+    _mm256_stream_si256 (dst, data);
+}
+
+/* save 8 pixels on a 32-byte boundary aligned address */
+static force_inline void
+save_256_aligned (__m256i* dst,
+                  __m256i  data)
+{
+    _mm256_store_si256 (dst, data);
+}
+
+/* save 8 pixels on a unaligned address */
+static force_inline void
+save_256_unaligned (__m256i* dst,
+                    __m256i  data)
+{
+    _mm256_storeu_si256 (dst, data);
+}
+
+/* Work around a code generation bug in Sun Studio 12. */
+#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
+# define create_mask_2x32_256(mask0, mask1)				\
+    (_mm256_set_epi32 ((mask0), (mask1), (mask0), (mask1), \
+			(mask0), (mask1), (mask0), (mask1)))
+#else
+static force_inline __m256i
+create_mask_2x32_256 (uint32_t mask0,
+                      uint32_t mask1)
+{
+    return _mm256_set_epi32 (mask0, mask1, mask0, mask1,
+				mask0, mask1, mask0, mask1);
+}
+#endif
+
+static const pixman_fast_path_t avx_fast_paths[] =
+{
+    { PIXMAN_OP_NONE },
+};
+
+static uint32_t *
+avx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+    int w = iter->width;
+    __m256i ff000000 = mask_ff000000;
+    uint32_t *dst = iter->buffer;
+    uint32_t *src = (uint32_t *)iter->bits;
+
+    iter->bits += iter->stride;
+
+    while (w && ((unsigned long)dst) & 31)
+    {
+	*dst++ = (*src++) | 0xff000000;
+	w--;
+    }
+
+    while (w >= 8)
+    {
+	/* _mm256_or_ps is a hack, since AVX doesn't have integer OR */
+	save_256_aligned (
+	    (__m256i *)dst, (__m256i)_mm256_or_ps (
+		(__m256)load_256_unaligned ((__m256i *)src), (__m256)ff000000));
+
+	dst += 8;
+	src += 8;
+	w -= 8;
+    }
+
+    while (w)
+    {
+	*dst++ = (*src++) | 0xff000000;
+	w--;
+    }
+
+    return iter->buffer;
+}
+
+typedef struct
+{
+    pixman_format_code_t	format;
+    pixman_iter_get_scanline_t	get_scanline;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+    { PIXMAN_x8r8g8b8,		avx_fetch_x8r8g8b8 },
+    { PIXMAN_null }
+};
+
+static void
+avx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+    pixman_image_t *image = iter->image;
+    int x = iter->x;
+    int y = iter->y;
+    int width = iter->width;
+    int height = iter->height;
+
+#define FLAGS								\
+    (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+    if ((iter->flags & ITER_NARROW)				&&
+	(image->common.flags & FLAGS) == FLAGS			&&
+	x >= 0 && y >= 0					&&
+	x + width <= image->bits.width				&&
+	y + height <= image->bits.height)
+    {
+	const fetcher_info_t *f;
+
+	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+	{
+	    if (image->common.extended_format_code == f->format)
+	    {
+		uint8_t *b = (uint8_t *)image->bits.bits;
+		int s = image->bits.rowstride * 4;
+
+		iter->bits = b + s * iter->y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+		iter->stride = s;
+
+		iter->get_scanline = f->get_scanline;
+		return;
+	    }
+	}
+    }
+
+    imp->delegate->src_iter_init (imp->delegate, iter);
+}
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_avx (pixman_implementation_t *fallback)
+{
+    pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx_fast_paths);
+
+    /* AVX constants */
+    mask_ff000000 = create_mask_2x32_256 (0xff000000, 0xff000000);
+
+    imp->src_iter_init = avx_src_iter_init;
+
+    return imp;
+}
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 0e14ecb..53e1fa2 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -563,6 +563,7 @@ pixman_have_sse2 (void)
 
 #endif
 
+
 #else /* __amd64__ */
 #ifdef USE_MMX
 #define pixman_have_mmx() TRUE
@@ -570,6 +571,24 @@ pixman_have_sse2 (void)
 #ifdef USE_SSE2
 #define pixman_have_sse2() TRUE
 #endif
+#ifdef USE_AVX
+static pixman_bool_t
+pixman_have_avx (void)
+{
+    static pixman_bool_t initialized = FALSE;
+    static pixman_bool_t avx_present = TRUE;
+/*
+    if (!initialized)
+    {
+	unsigned int features = detect_cpu_features ();
+	avx_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2 | AVX)) == (MMX | MMX_EXTENSIONS | SSE | SSE2 | AVX);
+	initialized = TRUE;
+    }
+*/
+    return avx_present;
+}
+
+#endif
 #endif /* __amd64__ */
 #endif
 
@@ -591,6 +610,11 @@ _pixman_choose_implementation (void)
 	imp = _pixman_implementation_create_sse2 (imp);
 #endif
 
+#ifdef USE_AVX
+    if (pixman_have_avx ())
+	imp = _pixman_implementation_create_avx (imp);
+#endif
+
 #ifdef USE_ARM_SIMD
     if (pixman_have_arm_simd ())
 	imp = _pixman_implementation_create_arm_simd (imp);
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 60060a9..d979448 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -533,6 +533,11 @@ pixman_implementation_t *
 _pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
 #endif
 
+#ifdef USE_AVX
+pixman_implementation_t *
+_pixman_implementation_create_avx (pixman_implementation_t *fallback);
+#endif
+
 #ifdef USE_ARM_SIMD
 pixman_implementation_t *
 _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
-- 
1.7.3.4



More information about the Pixman mailing list