[Pixman] [PATCH 1/3] Add AVX fetcher for x8r8g8b8
Matt Turner
mattst88 at gmail.com
Sun May 15 20:44:38 PDT 2011
Signed-off-by: Matt Turner <mattst88 at gmail.com>
---
configure.ac | 53 ++++++++++++
pixman/Makefile.am | 13 +++
pixman/pixman-avx.c | 204 +++++++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-cpu.c | 24 ++++++
pixman/pixman-private.h | 5 +
5 files changed, 299 insertions(+), 0 deletions(-)
create mode 100644 pixman/pixman-avx.c
diff --git a/configure.ac b/configure.ac
index 2defbd4..20f9b1c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -379,6 +379,59 @@ fi
AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
dnl ===========================================================================
+dnl Check for AVX
+
+if test "x$AVX_CFLAGS" = "x" ; then
+ if test "x$SUNCC" = "xyes"; then
+ # AVX is enabled by default in the Sun Studio 64-bit environment
+ if test "$AMD64_ABI" = "no" ; then
+ AVX_CFLAGS="-xarch=avx"
+ fi
+ else
+ AVX_CFLAGS="-mavx -Winline"
+ fi
+fi
+
+have_avx_intrinsics=no
+AC_MSG_CHECKING(whether to use AVX intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$AVX_CFLAGS $CFLAGS"
+
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5))
+# if !defined(__amd64__) && !defined(__x86_64__)
+# error "Need GCC >= 4.5 for AVX intrinsics on x86"
+# endif
+#endif
+#include <immintrin.h>
+int main () {
+ __m256i a = _mm256_set1_epi32 (0), b = _mm256_set1_epi32 (0), c;
+ c = _mm256_permute2f128_si256 (a, b, 0);
+ return 0;
+}], have_avx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(avx,
+ [AC_HELP_STRING([--disable-avx],
+ [disable AVX fast paths])],
+ [enable_avx=$enableval], [enable_avx=auto])
+
+if test $enable_avx = no ; then
+ have_avx_intrinsics=disabled
+fi
+
+if test $have_avx_intrinsics = yes ; then
+ AC_DEFINE(USE_AVX, 1, [use AVX compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_avx_intrinsics)
+if test $enable_avx = yes && test $have_avx_intrinsics = no ; then
+ AC_MSG_ERROR([AVX intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_AVX, test $have_avx_intrinsics = yes)
+
+dnl ===========================================================================
dnl Other special flags needed when building code using MMX or SSE instructions
case $host_os in
solaris*)
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index be08266..2740520 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -124,5 +124,18 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la
ASM_CFLAGS_arm_neon=
endif
+# avx code
+if USE_AVX
+noinst_LTLIBRARIES += libpixman-avx.la
+libpixman_avx_la_SOURCES = \
+ pixman-avx.c
+libpixman_avx_la_CFLAGS = $(DEP_CFLAGS) $(AVX_CFLAGS)
+libpixman_avx_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(AVX_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-avx.la
+
+ASM_CFLAGS_avx=$(AVX_CFLAGS)
+endif
+
.c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
$(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman-avx.c b/pixman/pixman-avx.c
new file mode 100644
index 0000000..09b867d
--- /dev/null
+++ b/pixman/pixman-avx.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright © 2008 Rodrigo Kumpera
+ * Copyright © 2008 André Tupinambá
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Red Hat not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. Red Hat makes no representations about the
+ * suitability of this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Rodrigo Kumpera (kumpera at gmail.com)
+ * André Tupinambá (andrelrt at gmail.com)
+ *
+ * Based on work by Owen Taylor and Søren Sandmann
+ *
+ * AVX code mostly renamed from pixman-sse2.c by Matt Turner.
+ */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <immintrin.h> /* for AVX intrinsics */
+#include "pixman-private.h"
+#include "pixman-combine32.h"
+#include "pixman-fast-path.h"
+
+static __m256i mask_ff000000;
+
+/* load 8 pixels from a 32-byte boundary aligned address */
+static force_inline __m256i
+load_256_aligned (__m256i* src)
+{
+ return _mm256_load_si256 (src);
+}
+
+/* load 8 pixels from a unaligned address */
+static force_inline __m256i
+load_256_unaligned (const __m256i* src)
+{
+ return _mm256_loadu_si256 (src);
+}
+
+/* save 8 pixels using Write Combining memory on a 32-byte
+ * boundary aligned address
+ */
+static force_inline void
+save_256_write_combining (__m256i* dst,
+ __m256i data)
+{
+ _mm256_stream_si256 (dst, data);
+}
+
+/* save 8 pixels on a 32-byte boundary aligned address */
+static force_inline void
+save_256_aligned (__m256i* dst,
+ __m256i data)
+{
+ _mm256_store_si256 (dst, data);
+}
+
+/* save 8 pixels on a unaligned address */
+static force_inline void
+save_256_unaligned (__m256i* dst,
+ __m256i data)
+{
+ _mm256_storeu_si256 (dst, data);
+}
+
+/* Work around a code generation bug in Sun Studio 12. */
+#if defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590)
+# define create_mask_2x32_256(mask0, mask1) \
+ (_mm256_set_epi32 ((mask0), (mask1), (mask0), (mask1), \
+ (mask0), (mask1), (mask0), (mask1)))
+#else
+static force_inline __m256i
+create_mask_2x32_256 (uint32_t mask0,
+ uint32_t mask1)
+{
+ return _mm256_set_epi32 (mask0, mask1, mask0, mask1,
+ mask0, mask1, mask0, mask1);
+}
+#endif
+
+static const pixman_fast_path_t avx_fast_paths[] =
+{
+ { PIXMAN_OP_NONE },
+};
+
+static uint32_t *
+avx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
+{
+ int w = iter->width;
+ __m256i ff000000 = mask_ff000000;
+ uint32_t *dst = iter->buffer;
+ uint32_t *src = (uint32_t *)iter->bits;
+
+ iter->bits += iter->stride;
+
+ while (w && ((unsigned long)dst) & 31)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ while (w >= 8)
+ {
+ /* _mm256_or_ps is a hack, since AVX doesn't have integer OR */
+ save_256_aligned (
+ (__m256i *)dst, (__m256i)_mm256_or_ps (
+ (__m256)load_256_unaligned ((__m256i *)src), (__m256)ff000000));
+
+ dst += 8;
+ src += 8;
+ w -= 8;
+ }
+
+ while (w)
+ {
+ *dst++ = (*src++) | 0xff000000;
+ w--;
+ }
+
+ return iter->buffer;
+}
+
+typedef struct
+{
+ pixman_format_code_t format;
+ pixman_iter_get_scanline_t get_scanline;
+} fetcher_info_t;
+
+static const fetcher_info_t fetchers[] =
+{
+ { PIXMAN_x8r8g8b8, avx_fetch_x8r8g8b8 },
+ { PIXMAN_null }
+};
+
+static void
+avx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+ pixman_image_t *image = iter->image;
+ int x = iter->x;
+ int y = iter->y;
+ int width = iter->width;
+ int height = iter->height;
+
+#define FLAGS \
+ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+ if ((iter->flags & ITER_NARROW) &&
+ (image->common.flags & FLAGS) == FLAGS &&
+ x >= 0 && y >= 0 &&
+ x + width <= image->bits.width &&
+ y + height <= image->bits.height)
+ {
+ const fetcher_info_t *f;
+
+ for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
+ {
+ if (image->common.extended_format_code == f->format)
+ {
+ uint8_t *b = (uint8_t *)image->bits.bits;
+ int s = image->bits.rowstride * 4;
+
+ iter->bits = b + s * iter->y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ iter->stride = s;
+
+ iter->get_scanline = f->get_scanline;
+ return;
+ }
+ }
+ }
+
+ imp->delegate->src_iter_init (imp->delegate, iter);
+}
+
+#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+__attribute__((__force_align_arg_pointer__))
+#endif
+pixman_implementation_t *
+_pixman_implementation_create_avx (pixman_implementation_t *fallback)
+{
+ pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx_fast_paths);
+
+ /* AVX constants */
+ mask_ff000000 = create_mask_2x32_256 (0xff000000, 0xff000000);
+
+ imp->src_iter_init = avx_src_iter_init;
+
+ return imp;
+}
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 0e14ecb..53e1fa2 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -563,6 +563,7 @@ pixman_have_sse2 (void)
#endif
+
#else /* __amd64__ */
#ifdef USE_MMX
#define pixman_have_mmx() TRUE
@@ -570,6 +571,24 @@ pixman_have_sse2 (void)
#ifdef USE_SSE2
#define pixman_have_sse2() TRUE
#endif
+#ifdef USE_AVX
+static pixman_bool_t
+pixman_have_avx (void)
+{
+ static pixman_bool_t initialized = FALSE;
+ static pixman_bool_t avx_present = TRUE;
+/*
+ if (!initialized)
+ {
+ unsigned int features = detect_cpu_features ();
+ avx_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2 | AVX)) == (MMX | MMX_EXTENSIONS | SSE | SSE2 | AVX);
+ initialized = TRUE;
+ }
+*/
+ return avx_present;
+}
+
+#endif
#endif /* __amd64__ */
#endif
@@ -591,6 +610,11 @@ _pixman_choose_implementation (void)
imp = _pixman_implementation_create_sse2 (imp);
#endif
+#ifdef USE_AVX
+ if (pixman_have_avx ())
+ imp = _pixman_implementation_create_avx (imp);
+#endif
+
#ifdef USE_ARM_SIMD
if (pixman_have_arm_simd ())
imp = _pixman_implementation_create_arm_simd (imp);
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 60060a9..d979448 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -533,6 +533,11 @@ pixman_implementation_t *
_pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
#endif
+#ifdef USE_AVX
+pixman_implementation_t *
+_pixman_implementation_create_avx (pixman_implementation_t *fallback);
+#endif
+
#ifdef USE_ARM_SIMD
pixman_implementation_t *
_pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
--
1.7.3.4
More information about the Pixman
mailing list