Mesa (llvmpipe-rast-64): llvmpipe: Take back AVX2 intrinsic support.

Tue Oct 29 13:19:21 UTC 2013

Module: Mesa
Branch: llvmpipe-rast-64
Commit: 410266f5477ebe683f15682efdcaaa0e9e1d9a8f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=410266f5477ebe683f15682efdcaaa0e9e1d9a8f

Author: José Fonseca <jfonseca at vmware.com>
Date:   Tue Oct 29 12:39:10 2013 +0000

llvmpipe: Take back AVX2 intrinsic support.

The percentage of AVX2 systems out there is so small that it's hard to
justify hand-coding AVX2 intrinsic code paths at this moment.

(Of course, we can support AVX2 in LLVM IR though, but at the moment
rasterization code does not use LLVM JIT.)

---

 scons/gallium.py                            |    2 +-
 src/gallium/auxiliary/util/u_sse.h          |   25 ---------
 src/gallium/drivers/llvmpipe/lp_setup_tri.c |   71 +--------------------------
 src/gallium/include/pipe/p_config.h         |   14 +-----
 4 files changed, 4 insertions(+), 108 deletions(-)

diff --git a/scons/gallium.py b/scons/gallium.py
index 21cd7c0..be3c3e7 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -367,7 +367,7 @@ def generate(env):
                     '-mtune=i686' # use i686 where we can
                 ]
         if env['machine'] == 'x86_64':
-            ccflags += ['-m64', '-mavx', '-mavx2']
+            ccflags += ['-m64']
             if platform == 'darwin':
                 ccflags += ['-fno-common']
         if env['platform'] not in ('cygwin', 'haiku', 'windows'):
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 4ba8b61..d100c47 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -254,31 +254,6 @@ transpose4_epi32(const __m128i * restrict a,
 
 #define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))
 
-#if defined(PIPE_ARCH_AVX2)
-
-#include <immintrin.h>
-
-static INLINE void
-transpose4_epi64(const __m256i * restrict a,
-                 const __m256i * restrict b,
-                 const __m256i * restrict c,
-                 const __m256i * restrict d,
-                 __m256i * restrict o,
-                 __m256i * restrict p,
-                 __m256i * restrict q,
-                 __m256i * restrict r)
-{
-  __m256i t0 = _mm256_unpacklo_epi64(*a, *b);
-  __m256i t1 = _mm256_unpacklo_epi64(*c, *d);
-  __m256i t2 = _mm256_unpackhi_epi64(*a, *b);
-  __m256i t3 = _mm256_unpackhi_epi64(*c, *d);
-
-  *o = _mm256_unpacklo_epi64(t0, t1);
-  *p = _mm256_unpackhi_epi64(t0, t1);
-  *q = _mm256_unpacklo_epi64(t2, t3);
-  *r = _mm256_unpackhi_epi64(t2, t3);
-}
-#endif /* PIPE_ARCH_AVX2 */
 
 #endif /* PIPE_ARCH_SSE */
 
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index aae48e7..1942c21 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -42,10 +42,6 @@
 
 #define NUM_CHANNELS 4
 
-#if defined(PIPE_ARCH_AVX2)
-#include <immintrin.h>
-#endif
-
 #if defined(PIPE_ARCH_SSE)
 #include <emmintrin.h>
 #endif
@@ -366,72 +362,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
 
    plane = GET_PLANES(tri);
 
-#if defined(PIPE_ARCH_AVX2)
-   {
-      __m256i vertx, verty;
-      __m256i shufx, shufy;
-      __m256i dcdx, dcdy, c;
-      __m256i unused;
-      __m256i dcdx_neg_mask;
-      __m256i dcdy_neg_mask;
-      __m256i dcdx_zero_mask;
-      __m256i top_left_flag;
-      __m256i c_inc_mask, c_inc;
-      __m256i eo, p0, p1, p2;
-      __m256i zero = _mm256_setzero_si256();
-
-      vertx = _mm256_loadu_si256((__m256i *)position->x); /* vertex x coords */
-      verty = _mm256_loadu_si256((__m256i *)position->y); /* vertex y coords */
-
-      shufx = _mm256_shuffle_epi64(vertx, _MM_SHUFFLE(3,0,2,1));
-      shufy = _mm256_shuffle_epi64(verty, _MM_SHUFFLE(3,0,2,1));
-
-      dcdx = _mm256_sub_epi64(verty, shufy);
-      dcdy = _mm256_sub_epi64(vertx, shufx);
-
-      dcdx_neg_mask = _mm256_srai_epi64(dcdx, FIXED_SHIFT);
-      dcdx_zero_mask = _mm256_cmpeq_epi64(dcdx, zero);
-      dcdy_neg_mask = _mm256_srai_epi64(dcdy, FIXED_SHIFT);
-
-      top_left_flag = _mm256_set1_epi64x(
-         (setup->bottom_edge_rule == 0) ? ~0 : 0);
-
-      c_inc_mask = _mm256_or_si256(
-         dcdx_neg_mask,
-         _mm256_and_si256(dcdx_zero_mask,
-                          _mm256_xor_si256(dcdy_neg_mask,
-                                           top_left_flag)));
-
-      c_inc = _mm256_srli_epi64(c_inc_mask, FIXED_SHIFT);
-
-      c = _mm256_sub_epi64(mm256_mullo_epi64(dcdx, vertx),
-                           mm256_mullo_epi64(dcdy, verty));
-
-      c = _mm256_add_epi64(c, c_inc);
-
-      /* Scale up to match c:
-       */
-      dcdx = _mm256_slli_epi64(dcdx, FIXED_ORDER);
-      dcdy = _mm256_slli_epi64(dcdy, FIXED_ORDER);
-
-      /* Calculate trivial reject values:
-       */
-      eo = _mm256_sub_epi64(_mm256_andnot_si256(dcdy_neg_mask, dcdy),
-                            _mm256_and_si256(dcdx_neg_mask, dcdx));
-
-      /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
-
-      /* Pointless transpose which gets undone immediately in
-       * rasterization:
-       */
-      transpose4_epi64(&c, &dcdx, &dcdy, &eo,
-                       &p0, &p1, &p2, &unused);
-
-      _mm256_store_si256((__m256i *)&plane[0], p0);
-      _mm256_store_si256((__m256i *)&plane[1], p1);
-      _mm256_store_si256((__m256i *)&plane[2], p2);
-   }
-#elif defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_SSE)
    {
       __m128i vertx, verty;
       __m128i shufx, shufy;
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index 374d77a..9bccf32 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -98,22 +98,12 @@
 #if defined(PIPE_CC_GCC) && !defined(__SSE2__)
 /* #warning SSE2 support requires -msse -msse2 compiler options */
 #else
-//#define PIPE_ARCH_SSE
+#define PIPE_ARCH_SSE
 #endif
 #if defined(PIPE_CC_GCC) && !defined(__SSSE3__)
 /* #warning SSE3 support requires -msse3 compiler options */
 #else
-//#define PIPE_ARCH_SSSE3
-#endif
-#if defined(PIPE_CC_GCC) && !defined(__AVX__)
-/* #warning AVX support requires -mavx compiler options */
-#else
-//#define PIPE_ARCH_AVX
-#endif
-#if defined(PIPE_CC_GCC) && !defined(__AVX2__)
-/* #warning AVX2 support requires -mavx2 compiler options */
-#else
-//#define PIPE_ARCH_AVX2
+#define PIPE_ARCH_SSSE3
 #endif
 #endif