[Mesa-dev] [PATCH 03/14] swr: [rasterizer] add support for building avx512 version
Chuck Atkins
chuck.atkins at kitware.com
Mon Jun 20 14:27:34 UTC 2016
Doesn't this also need corresponding compiler flags in configure.ac to
populate SWR_AVX512_CXXFLAGS?
- Chuck
On Fri, Jun 17, 2016 at 3:25 PM, Tim Rowley <timothy.o.rowley at intel.com>
wrote:
> Currently, most code paths between AVX2 and AVX512 are identical
> (see changes to knobs.h).
> ---
> src/gallium/drivers/swr/rasterizer/common/simdintrin.h | 4 ++--
> src/gallium/drivers/swr/rasterizer/core/format_types.h | 8 ++++----
> src/gallium/drivers/swr/rasterizer/core/knobs.h | 15
> ++++++++++-----
> src/gallium/drivers/swr/rasterizer/memory/Convert.h | 4 ++--
> src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp | 4 ++--
> 5 files changed, 20 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> index 5ec1f71..cc29b5d 100644
> --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h
> @@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)
> INLINE
> UINT pdep_u32(UINT a, UINT mask)
> {
> -#if KNOB_ARCH==KNOB_ARCH_AVX2
> +#if KNOB_ARCH >= KNOB_ARCH_AVX2
> return _pdep_u32(a, mask);
> #else
> UINT result = 0;
> @@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)
> INLINE
> UINT pext_u32(UINT a, UINT mask)
> {
> -#if KNOB_ARCH==KNOB_ARCH_AVX2
> +#if KNOB_ARCH >= KNOB_ARCH_AVX2
> return _pext_u32(a, mask);
> #else
> UINT result = 0;
> diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h
> b/src/gallium/drivers/swr/rasterizer/core/format_types.h
> index afb6337..6612c83 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/format_types.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h
> @@ -98,7 +98,7 @@ struct PackTraits<8, false>
> __m256i result = _mm256_castsi128_si256(resLo);
> result = _mm256_insertf128_si256(result, resHi, 1);
> return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
> return
> _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
> #endif
> #else
> @@ -161,7 +161,7 @@ struct PackTraits<8, true>
> __m256i result = _mm256_castsi128_si256(resLo);
> result = _mm256_insertf128_si256(result, resHi, 1);
> return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
> return
> _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
> #endif
> #else
> @@ -223,7 +223,7 @@ struct PackTraits<16, false>
> __m256i result = _mm256_castsi128_si256(resLo);
> result = _mm256_insertf128_si256(result, resHi, 1);
> return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
> return
> _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
> #endif
> #else
> @@ -285,7 +285,7 @@ struct PackTraits<16, true>
> __m256i result = _mm256_castsi128_si256(resLo);
> result = _mm256_insertf128_si256(result, resHi, 1);
> return _mm256_castsi256_ps(result);
> -#elif KNOB_ARCH==KNOB_ARCH_AVX2
> +#elif KNOB_ARCH>=KNOB_ARCH_AVX2
> return
> _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
> #endif
> #else
> diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h
> b/src/gallium/drivers/swr/rasterizer/core/knobs.h
> index 55a22a6..2629276 100644
> --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h
> +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h
> @@ -52,11 +52,16 @@
> #define KNOB_SIMD_WIDTH 8
> #define KNOB_SIMD_BYTES 32
> #elif (KNOB_ARCH == KNOB_ARCH_AVX512)
> -#define KNOB_ARCH_ISA AVX512F
> -#define KNOB_ARCH_STR "AVX512"
> -#define KNOB_SIMD_WIDTH 16
> -#define KNOB_SIMD_BYTES 64
> -#error "AVX512 not yet supported"
> +#define KNOB_ARCH_ISA AVX2
> +#define KNOB_ARCH_STR "AVX2"
> +#define KNOB_SIMD_WIDTH 8
> +#define KNOB_SIMD_BYTES 32
> +// Disable AVX512 for now...
> +//#define KNOB_ARCH_ISA AVX512F
> +//#define KNOB_ARCH_STR "AVX512"
> +//#define KNOB_SIMD_WIDTH 16
> +//#define KNOB_SIMD_BYTES 64
> +//#error "AVX512 not yet supported"
> #else
> #error "Unknown architecture"
> #endif
> diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> index 42b973c..b790d35 100644
> --- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> +++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h
> @@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(
> // Convert from 32-bit float to 16-bit float using
> _mm_cvtps_ph
> // @todo 16bit float instruction support is orthogonal to
> avx support. need to
> // add check for F16C support instead.
> -#if KNOB_ARCH == KNOB_ARCH_AVX2
> +#if KNOB_ARCH >= KNOB_ARCH_AVX2
> __m128 src128 = _mm_set1_ps(src);
> __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
> UINT value = _mm_extract_epi16(srci128, 0);
> @@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat(
> float dst;
> if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)
> {
> -#if KNOB_ARCH == KNOB_ARCH_AVX2
> +#if KNOB_ARCH >= KNOB_ARCH_AVX2
> // Convert from 16-bit float to 32-bit float using
> _mm_cvtph_ps
> // @todo 16bit float instruction support is orthogonal to
> avx support. need to
> // add check for F16C support instead.
> diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
> b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
> index 2ab2936..8a26ff6 100644
> --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
> +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp
> @@ -454,7 +454,7 @@ INLINE static void FlatConvert(const uint8_t* pSrc,
> uint8_t* pDst, uint8_t* pDst
> __m256i final = _mm256_castsi128_si256(vRow00);
> final = _mm256_insertf128_si256(final, vRow10, 1);
>
> -#elif KNOB_ARCH == KNOB_ARCH_AVX2
> +#elif KNOB_ARCH >= KNOB_ARCH_AVX2
>
> // logic is as above, only wider
> src1 = _mm256_slli_si256(src1, 1);
> @@ -542,7 +542,7 @@ INLINE static void FlatConvertNoAlpha(const uint8_t*
> pSrc, uint8_t* pDst, uint8_
> __m256i final = _mm256_castsi128_si256(vRow00);
> final = _mm256_insertf128_si256(final, vRow10, 1);
>
> -#elif KNOB_ARCH == KNOB_ARCH_AVX2
> +#elif KNOB_ARCH >= KNOB_ARCH_AVX2
>
> // logic is as above, only
> wider
> src1 = _mm256_slli_si256(src1, 1);
> --
> 1.9.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20160620/cba7e46f/attachment.html>
More information about the mesa-dev
mailing list