[Pixman] [PATCH] Adding infrastructure to permit future AVX2 implementations
Devulapalli, Raghuveer
raghuveer.devulapalli at intel.com
Thu Sep 6 19:10:21 UTC 2018
Thanks Matt and Chris for your valuable feedback. I am incorporating your feedback and preparing updated patches. Unfortunately I am out of office for the next 2-3 weeks, but I will post them shortly after I get back.
Thanks,
Raghuveer
-----Original Message-----
From: Matt Turner [mailto:mattst88 at gmail.com]
Sent: Wednesday, August 29, 2018 11:43 AM
To: Devulapalli, Raghuveer <raghuveer.devulapalli at intel.com>
Cc: pixman at lists.freedesktop.org
Subject: Re: [Pixman] [PATCH] Adding infrastructure to permit future AVX2 implementations
Thank you for the patches! Some comments inline.
On Wed, Aug 22, 2018 at 10:03 AM raghuveer devulapalli <raghuveer.devulapalli at intel.com> wrote:
>
> ---
> configure.ac | 44 ++++++++++++++++++++++++++++++++++++++++++++
> pixman/Makefile.am | 12 ++++++++++++
> pixman/pixman-avx2.c | 32 ++++++++++++++++++++++++++++++++
> pixman/pixman-private.h | 5 +++++
> pixman/pixman-x86.c | 15 +++++++++++++--
> 5 files changed, 106 insertions(+), 2 deletions(-) create mode
> 100644 pixman/pixman-avx2.c
>
> diff --git a/configure.ac b/configure.ac index e833e45..27f4305 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -503,6 +503,48 @@ fi
> AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
>
> dnl
> ======================================================================
> =====
> +dnl Check for AVX2
Trailing whitespace
> +
> +if test "x$AVX2_CFLAGS" = "x" ; then
> + AVX2_CFLAGS="-mavx2 -Winline"
> +fi
> +
> +have_avx2_intrinsics=no
> +AC_MSG_CHECKING(whether to use AVX2 intrinsics)
> +xserver_save_CFLAGS=$CFLAGS CFLAGS="$AVX2_CFLAGS $CFLAGS"
> +
> +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
> +#include <immintrin.h>
> +int param;
> +int main () {
> + __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
> + c = _mm256_maddubs_epi16 (a, b);
> + return _mm256_cvtsi256_si32(c);
> +}]])], have_avx2_intrinsics=yes)
> +CFLAGS=$xserver_save_CFLAGS
> +
> +AC_ARG_ENABLE(avx2,
> + [AC_HELP_STRING([--disable-avx2],
> + [disable AVX2 fast paths])],
> + [enable_avx2=$enableval], [enable_avx2=auto])
> +
> +if test $enable_avx2 = no ; then
> + have_avx2_intrinsics=disabled
> +fi
> +
> +if test $have_avx2_intrinsics = yes ; then
> + AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics]) fi
> +
> +AC_MSG_RESULT($have_avx2_intrinsics)
> +if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
> + AC_MSG_ERROR([AVX2 intrinsics not detected]) fi
> +
> +AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
> +
> +dnl
> +=====================================================================
> +======
> dnl Other special flags needed when building code using MMX or SSE
> instructions case $host_os in
> solaris*)
> @@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)
> AC_SUBST(SSE2_CFLAGS)
> AC_SUBST(SSE2_LDFLAGS)
> AC_SUBST(SSSE3_CFLAGS)
> +AC_SUBST(AVX2_CFLAGS)
> +AC_SUBST(AVX2_LDFLAGS)
>
> dnl
> ======================================================================
> =====
> dnl Check for VMX/Altivec
> diff --git a/pixman/Makefile.am b/pixman/Makefile.am index
> 581b6f6..7204621 100644
> --- a/pixman/Makefile.am
> +++ b/pixman/Makefile.am
> @@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la
> ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
> endif
>
> +# avx2 code
> +if USE_AVX2
> +noinst_LTLIBRARIES += libpixman-avx2.la libpixman_avx2_la_SOURCES = \
> + pixman-avx2.c
> +libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS) libpixman_1_la_LDFLAGS +=
> +$(AVX2_LDFLAGS) libpixman_1_la_LIBADD += libpixman-avx2.la
> +
> +ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
> +endif
> +
> # arm simd code
> if USE_ARM_SIMD
> noinst_LTLIBRARIES += libpixman-arm-simd.la diff --git
> a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c new file mode 100644
> index 0000000..d860d67
> --- /dev/null
> +++ b/pixman/pixman-avx2.c
> @@ -0,0 +1,32 @@
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <immintrin.h> /* for AVX2 intrinsics */ #include
> +"pixman-private.h"
> +#include "pixman-combine32.h"
> +#include "pixman-inlines.h"
> +
> +static const pixman_fast_path_t avx2_fast_paths[] = {
> + { PIXMAN_OP_NONE },
> +};
> +
> +static const pixman_iter_info_t avx2_iters[] =
Trailing whitespace
> +{
> + { PIXMAN_null },
> +};
> +
> +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
> +__attribute__((__force_align_arg_pointer__))
> +#endif
> +pixman_implementation_t *
> +_pixman_implementation_create_avx2 (pixman_implementation_t
> +*fallback) {
> + pixman_implementation_t *imp = _pixman_implementation_create
> +(fallback, avx2_fast_paths);
> +
> + /* Set up function pointers */
> + imp->iter_info = avx2_iters;
> +
> + return imp;
> +}
> diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index
> 73a5414..b6b15df 100644
> --- a/pixman/pixman-private.h
> +++ b/pixman/pixman-private.h
> @@ -597,6 +597,11 @@ pixman_implementation_t *
> _pixman_implementation_create_ssse3 (pixman_implementation_t
> *fallback); #endif
>
> +#ifdef USE_AVX2
> +pixman_implementation_t *
> +_pixman_implementation_create_avx2 (pixman_implementation_t
> +*fallback); #endif
> +
> #ifdef USE_ARM_SIMD
> pixman_implementation_t *
> _pixman_implementation_create_arm_simd (pixman_implementation_t
> *fallback); diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
> index 05297c4..687c83b 100644
> --- a/pixman/pixman-x86.c
> +++ b/pixman/pixman-x86.c
At the top of this file there is a preprocessor check:
#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)
I think || defined (USE_AVX2) should be added here.
> @@ -40,7 +40,8 @@ typedef enum
> X86_SSE = (1 << 2) | X86_MMX_EXTENSIONS,
> X86_SSE2 = (1 << 3),
> X86_CMOV = (1 << 4),
> - X86_SSSE3 = (1 << 5)
> + X86_SSSE3 = (1 << 5),
> + X86_AVX2 = (1 << 6),
I'm not 100% we can use trailing commas in pixman due to MSVC.
Probably safer just to leave it off.
> } cpu_features_t;
>
> #ifdef HAVE_GETISAX
> @@ -119,7 +120,7 @@ pixman_cpuid (uint32_t feature,
> __asm__ volatile (
> "cpuid" "\n\t"
> : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
> - : "a" (feature));
> + : "a" (feature), "c" (0));
Just to make sure I'm understanding: cpuid returns AVX2 presence in bit 5 of ebx when it is executed with eax=7 and ecx=0, so we need to ensure ecx is set to 0?
I think that's fine. It seems like ecx isn't required to be any particular value for the other cases. Perhaps a comment would help future readers understand.
> #else
> /* On x86-32 we need to be careful about the handling of %ebx
> * and %esp. We can't declare either one as clobbered @@ -172,6
> +173,10 @@ detect_cpu_features (void)
> features |= X86_SSE2;
> if (c & (1 << 9))
> features |= X86_SSSE3;
> +
Spurious whitespace
> + pixman_cpuid (0x07, &a, &b, &c, &d);
> + if (b & (1 << 5))
> + features |= X86_AVX2;
>
> /* Check for AMD specific features */
> if ((features & X86_MMX) && !(features & X86_SSE)) @@ -228,6
> +233,7 @@ _pixman_x86_get_implementations (pixman_implementation_t
> *imp) #define MMX_BITS (X86_MMX | X86_MMX_EXTENSIONS) #define
> SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2) #define
> SSSE3_BITS (X86_SSE | X86_SSE2 | X86_SSSE3)
> +#define AVX2_BITS (X86_AVX2)
>
> #ifdef USE_X86_MMX
> if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS)) @@
> -244,5 +250,10 @@ _pixman_x86_get_implementations (pixman_implementation_t *imp)
> imp = _pixman_implementation_create_ssse3 (imp); #endif
>
> +#if (defined USE_AVX2 && defined USE_SSE2)
> + if (!_pixman_disabled ("avx2") && have_feature (AVX2_BITS))
> + imp = _pixman_implementation_create_avx2(imp);
> +#endif
> +
> return imp;
> }
> --
> 2.7.4
More information about the Pixman
mailing list