[Pixman] [PATCH 1/3] Rev2 of patch: Adding infrastructure for AVX2 implementations

Chris Wilson chris at chris-wilson.co.uk
Thu Jan 17 08:23:40 UTC 2019


Quoting Raghuveer Devulapalli (2019-01-17 00:59:58)
> From: raghuveer devulapalli <raghuveer.devulapalli at intel.com>
> 
> Based on Chris Wilson's comment, added a check to ensure that OS
> supports AVX2.  This is done using xgetbv instruction which tells if you
> can save the YMM registers.
> ---
>  configure.ac            | 44 +++++++++++++++++++++++++++++++++++++++++
>  pixman/Makefile.am      | 12 +++++++++++
>  pixman/pixman-avx2.c    | 32 ++++++++++++++++++++++++++++++
>  pixman/pixman-private.h |  5 +++++
>  pixman/pixman-x86.c     | 38 ++++++++++++++++++++++++++++++++---
>  5 files changed, 128 insertions(+), 3 deletions(-)
>  create mode 100644 pixman/pixman-avx2.c
> 
> diff --git a/configure.ac b/configure.ac
> index 6f6bd7e..97b1a85 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -502,6 +502,48 @@ fi
>  
>  AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
>  
> +dnl ===========================================================================
> +dnl Check for AVX2
> +
> +if test "x$AVX2_CFLAGS" = "x" ; then
> +    AVX2_CFLAGS="-mavx2 -Winline"
> +fi

Hmm, -Winline seems useful. Do we have a cc_opt checker, like
CAIRO_CC_TRY_FLAG? Apparently not, and no convenient lib from aclocal.
Boo.

> +have_avx2_intrinsics=no
> +AC_MSG_CHECKING(whether to use AVX2 intrinsics)
> +xserver_save_CFLAGS=$CFLAGS
> +CFLAGS="$AVX2_CFLAGS $CFLAGS"
> +
> +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
> +#include <immintrin.h>
> +int param;
> +int main () {
> +    __m256i a = _mm256_set1_epi32 (param), b = _mm256_set1_epi32 (param + 1), c;
> +    c = _mm256_maddubs_epi16 (a, b);
> +    return _mm256_cvtsi256_si32(c);
> +}]])], have_avx2_intrinsics=yes)
> +CFLAGS=$xserver_save_CFLAGS

Which also does the verification of cflags. Ok.

> +
> +AC_ARG_ENABLE(avx2,
> +   [AC_HELP_STRING([--disable-avx2],
> +                   [disable AVX2 fast paths])],
> +   [enable_avx2=$enableval], [enable_avx2=auto])
> +
> +if test $enable_avx2 = no ; then
> +   have_avx2_intrinsics=disabled
> +fi
> +
> +if test $have_avx2_intrinsics = yes ; then
> +   AC_DEFINE(USE_AVX2, 1, [use AVX2 compiler intrinsics])
> +fi
> +
> +AC_MSG_RESULT($have_avx2_intrinsics)
> +if test $enable_avx2 = yes && test $have_avx2_intrinsics = no ; then
> +   AC_MSG_ERROR([AVX2 intrinsics not detected])
> +fi
> +
> +AM_CONDITIONAL(USE_AVX2, test $have_avx2_intrinsics = yes)
> +
>  dnl ===========================================================================
>  dnl Other special flags needed when building code using MMX or SSE instructions
>  case $host_os in
> @@ -538,6 +580,8 @@ AC_SUBST(MMX_LDFLAGS)
>  AC_SUBST(SSE2_CFLAGS)
>  AC_SUBST(SSE2_LDFLAGS)
>  AC_SUBST(SSSE3_CFLAGS)
> +AC_SUBST(AVX2_CFLAGS)
> +AC_SUBST(AVX2_LDFLAGS)
>  
>  dnl ===========================================================================
>  dnl Check for VMX/Altivec
> diff --git a/pixman/Makefile.am b/pixman/Makefile.am
> index 581b6f6..7204621 100644
> --- a/pixman/Makefile.am
> +++ b/pixman/Makefile.am
> @@ -64,6 +64,18 @@ libpixman_1_la_LIBADD += libpixman-ssse3.la
>  ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
>  endif
>  
> +# avx2 code
> +if USE_AVX2
> +noinst_LTLIBRARIES += libpixman-avx2.la
> +libpixman_avx2_la_SOURCES = \
> +       pixman-avx2.c
> +libpixman_avx2_la_CFLAGS = $(AVX2_CFLAGS)
> +libpixman_1_la_LDFLAGS += $(AVX2_LDFLAGS)
> +libpixman_1_la_LIBADD += libpixman-avx2.la
> +
> +ASM_CFLAGS_avx2=$(AVX2_CFLAGS)
> +endif
> +
>  # arm simd code
>  if USE_ARM_SIMD
>  noinst_LTLIBRARIES += libpixman-arm-simd.la
> diff --git a/pixman/pixman-avx2.c b/pixman/pixman-avx2.c
> new file mode 100644
> index 0000000..d860d67
> --- /dev/null
> +++ b/pixman/pixman-avx2.c
> @@ -0,0 +1,32 @@
> +#ifdef HAVE_CONFIG_H
> +#include <config.h>
> +#endif
> +
> +#include <immintrin.h> /* for AVX2 intrinsics */
> +#include "pixman-private.h"
> +#include "pixman-combine32.h"
> +#include "pixman-inlines.h"
> +
> +static const pixman_fast_path_t avx2_fast_paths[] =
> +{
> +    { PIXMAN_OP_NONE },
> +};
> +
> +static const pixman_iter_info_t avx2_iters[] = 
> +{
> +    { PIXMAN_null },
> +};
> +
> +#if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
> +__attribute__((__force_align_arg_pointer__))
> +#endif
> +pixman_implementation_t *
> +_pixman_implementation_create_avx2 (pixman_implementation_t *fallback)
> +{
> +    pixman_implementation_t *imp = _pixman_implementation_create (fallback, avx2_fast_paths);
> +
> +    /* Set up function pointers */
> +    imp->iter_info = avx2_iters;
> +
> +    return imp;
> +}
> diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
> index 73a5414..b6b15df 100644
> --- a/pixman/pixman-private.h
> +++ b/pixman/pixman-private.h
> @@ -597,6 +597,11 @@ pixman_implementation_t *
>  _pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
>  #endif
>  
> +#ifdef USE_AVX2
> +pixman_implementation_t *
> +_pixman_implementation_create_avx2 (pixman_implementation_t *fallback);
> +#endif
> +
>  #ifdef USE_ARM_SIMD
>  pixman_implementation_t *
>  _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
> diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
> index 05297c4..ac4edbc 100644
> --- a/pixman/pixman-x86.c
> +++ b/pixman/pixman-x86.c
> @@ -25,7 +25,10 @@
>  
>  #include "pixman-private.h"
>  
> -#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3)
> +#define xgetbv(index,eax,edx)                                   \
> +       __asm__ ("xgetbv" : "=a"(eax), "=d"(edx) : "c" (index))
> +
> +#if defined(USE_X86_MMX) || defined (USE_SSE2) || defined (USE_SSSE3) || defined (USE_AVX2)
>  
>  /* The CPU detection code needs to be in a file not compiled with
>   * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
> @@ -40,7 +43,8 @@ typedef enum
>      X86_SSE                    = (1 << 2) | X86_MMX_EXTENSIONS,
>      X86_SSE2                   = (1 << 3),
>      X86_CMOV                   = (1 << 4),
> -    X86_SSSE3                  = (1 << 5)
> +    X86_SSSE3                  = (1 << 5),
> +    X86_AVX2                   = (1 << 6)
>  } cpu_features_t;
>  
>  #ifdef HAVE_GETISAX
> @@ -116,10 +120,13 @@ pixman_cpuid (uint32_t feature,
>  #if defined (__GNUC__)
>  
>  #if _PIXMAN_X86_64
> +    /* To check presence of AVX2, cpuid needs to be executed with eax=7 and
> +     * ecx=0. Value of ecx does not matter for other cases.
> +     */
>      __asm__ volatile (
>          "cpuid"                                "\n\t"
>         : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
> -       : "a" (feature));
> +       : "a" (feature), "c" (0));
>  #else
>      /* On x86-32 we need to be careful about the handling of %ebx
>       * and %esp. We can't declare either one as clobbered
> @@ -151,10 +158,16 @@ pixman_cpuid (uint32_t feature,
>  #endif
>  }
>  
> +

Stray.

>  static cpu_features_t
>  detect_cpu_features (void)
>  {
>      uint32_t a, b, c, d;
> +    unsigned int extra = 0;

> +    unsigned int has_YMM = 0x1;
> +    unsigned int XMM_STATE = (0x01 << 1);
> +    unsigned int YMM_STATE = (0x01 << 2);
> +    unsigned int AVX_STATE = (XMM_STATE | YMM_STATE);

These are constants though, might be less surprising to use
enums/defines; at the very least mark them as constant.

>      cpu_features_t features = 0;
>  
>      if (!have_cpuid())
> @@ -173,6 +186,19 @@ detect_cpu_features (void)
>      if (c & (1 << 9))
>         features |= X86_SSSE3;
>  
> +    /* Ensure OS supports saving YMM registers */
> +    if (c & (1 << 27))
> +    {
> +       unsigned int bv_eax, bv_edx;
> +       xgetbv(0x00, bv_eax, bv_edx);
> +       if ((bv_eax & AVX_STATE) == AVX_STATE)
> +           extra |= has_YMM;
> +    }
> +
> +    pixman_cpuid (0x07, &a, &b, &c, &d);
> +    if ((extra & has_YMM) && (b & (1 << 5)))
> +       features |= X86_AVX2;

Ok, that matches my understanding of how one is meant to probe for
usable avx2.

Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris


More information about the Pixman mailing list