Mesa (llvmpipe-rast-64): llvmpipe: Take back AVX2 intrinsic support.
Jose Fonseca
jrfonseca at kemper.freedesktop.org
Tue Oct 29 13:19:21 UTC 2013
Module: Mesa
Branch: llvmpipe-rast-64
Commit: 410266f5477ebe683f15682efdcaaa0e9e1d9a8f
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=410266f5477ebe683f15682efdcaaa0e9e1d9a8f
Author: José Fonseca <jfonseca at vmware.com>
Date: Tue Oct 29 12:39:10 2013 +0000
llvmpipe: Take back AVX2 intrinsic support.
The percentage of AVX2 systems out there is so small that it's hard to
justify hand-coding AVX2 intrinsic code paths at this moment.
(Of course, we can support AVX2 in LLVM IR though, but at the moment
rasterization code does not use LLVM JIT.)
---
scons/gallium.py | 2 +-
src/gallium/auxiliary/util/u_sse.h | 25 ---------
src/gallium/drivers/llvmpipe/lp_setup_tri.c | 71 +--------------------------
src/gallium/include/pipe/p_config.h | 14 +-----
4 files changed, 4 insertions(+), 108 deletions(-)
diff --git a/scons/gallium.py b/scons/gallium.py
index 21cd7c0..be3c3e7 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -367,7 +367,7 @@ def generate(env):
'-mtune=i686' # use i686 where we can
]
if env['machine'] == 'x86_64':
- ccflags += ['-m64', '-mavx', '-mavx2']
+ ccflags += ['-m64']
if platform == 'darwin':
ccflags += ['-fno-common']
if env['platform'] not in ('cygwin', 'haiku', 'windows'):
diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h
index 4ba8b61..d100c47 100644
--- a/src/gallium/auxiliary/util/u_sse.h
+++ b/src/gallium/auxiliary/util/u_sse.h
@@ -254,31 +254,6 @@ transpose4_epi32(const __m128i * restrict a,
#define SCALAR_EPI32(m, i) _mm_shuffle_epi32((m), _MM_SHUFFLE(i,i,i,i))
-#if defined(PIPE_ARCH_AVX2)
-
-#include <immintrin.h>
-
-static INLINE void
-transpose4_epi64(const __m256i * restrict a,
- const __m256i * restrict b,
- const __m256i * restrict c,
- const __m256i * restrict d,
- __m256i * restrict o,
- __m256i * restrict p,
- __m256i * restrict q,
- __m256i * restrict r)
-{
- __m256i t0 = _mm256_unpacklo_epi64(*a, *b);
- __m256i t1 = _mm256_unpacklo_epi64(*c, *d);
- __m256i t2 = _mm256_unpackhi_epi64(*a, *b);
- __m256i t3 = _mm256_unpackhi_epi64(*c, *d);
-
- *o = _mm256_unpacklo_epi64(t0, t1);
- *p = _mm256_unpackhi_epi64(t0, t1);
- *q = _mm256_unpacklo_epi64(t2, t3);
- *r = _mm256_unpackhi_epi64(t2, t3);
-}
-#endif /* PIPE_ARCH_AVX2 */
#endif /* PIPE_ARCH_SSE */
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index aae48e7..1942c21 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -42,10 +42,6 @@
#define NUM_CHANNELS 4
-#if defined(PIPE_ARCH_AVX2)
-#include <immintrin.h>
-#endif
-
#if defined(PIPE_ARCH_SSE)
#include <emmintrin.h>
#endif
@@ -366,72 +362,7 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane = GET_PLANES(tri);
-#if defined(PIPE_ARCH_AVX2)
- {
- __m256i vertx, verty;
- __m256i shufx, shufy;
- __m256i dcdx, dcdy, c;
- __m256i unused;
- __m256i dcdx_neg_mask;
- __m256i dcdy_neg_mask;
- __m256i dcdx_zero_mask;
- __m256i top_left_flag;
- __m256i c_inc_mask, c_inc;
- __m256i eo, p0, p1, p2;
- __m256i zero = _mm256_setzero_si256();
-
- vertx = _mm256_loadu_si256((__m256i *)position->x); /* vertex x coords */
- verty = _mm256_loadu_si256((__m256i *)position->y); /* vertex y coords */
-
- shufx = _mm256_shuffle_epi64(vertx, _MM_SHUFFLE(3,0,2,1));
- shufy = _mm256_shuffle_epi64(verty, _MM_SHUFFLE(3,0,2,1));
-
- dcdx = _mm256_sub_epi64(verty, shufy);
- dcdy = _mm256_sub_epi64(vertx, shufx);
-
- dcdx_neg_mask = _mm256_srai_epi64(dcdx, FIXED_SHIFT);
- dcdx_zero_mask = _mm256_cmpeq_epi64(dcdx, zero);
- dcdy_neg_mask = _mm256_srai_epi64(dcdy, FIXED_SHIFT);
-
- top_left_flag = _mm256_set1_epi64x(
- (setup->bottom_edge_rule == 0) ? ~0 : 0);
-
- c_inc_mask = _mm256_or_si256(
- dcdx_neg_mask,
- _mm256_and_si256(dcdx_zero_mask,
- _mm256_xor_si256(dcdy_neg_mask,
- top_left_flag)));
-
- c_inc = _mm256_srli_epi64(c_inc_mask, FIXED_SHIFT);
-
- c = _mm256_sub_epi64(mm256_mullo_epi64(dcdx, vertx),
- mm256_mullo_epi64(dcdy, verty));
-
- c = _mm256_add_epi64(c, c_inc);
-
- /* Scale up to match c:
- */
- dcdx = _mm256_slli_epi64(dcdx, FIXED_ORDER);
- dcdy = _mm256_slli_epi64(dcdy, FIXED_ORDER);
-
- /* Calculate trivial reject values:
- */
- eo = _mm256_sub_epi64(_mm256_andnot_si256(dcdy_neg_mask, dcdy),
- _mm256_and_si256(dcdx_neg_mask, dcdx));
-
- /* ei = _mm_sub_epi32(_mm_sub_epi32(dcdy, dcdx), eo); */
-
- /* Pointless transpose which gets undone immediately in
- * rasterization:
- */
- transpose4_epi64(&c, &dcdx, &dcdy, &eo,
- &p0, &p1, &p2, &unused);
-
- _mm256_store_si256((__m256i *)&plane[0], p0);
- _mm256_store_si256((__m256i *)&plane[1], p1);
- _mm256_store_si256((__m256i *)&plane[2], p2);
- }
-#elif defined(PIPE_ARCH_SSE)
+#if defined(PIPE_ARCH_SSE)
{
__m128i vertx, verty;
__m128i shufx, shufy;
diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h
index 374d77a..9bccf32 100644
--- a/src/gallium/include/pipe/p_config.h
+++ b/src/gallium/include/pipe/p_config.h
@@ -98,22 +98,12 @@
#if defined(PIPE_CC_GCC) && !defined(__SSE2__)
/* #warning SSE2 support requires -msse -msse2 compiler options */
#else
-//#define PIPE_ARCH_SSE
+#define PIPE_ARCH_SSE
#endif
#if defined(PIPE_CC_GCC) && !defined(__SSSE3__)
/* #warning SSE3 support requires -msse3 compiler options */
#else
-//#define PIPE_ARCH_SSSE3
-#endif
-#if defined(PIPE_CC_GCC) && !defined(__AVX__)
-/* #warning AVX support requires -mavx compiler options */
-#else
-//#define PIPE_ARCH_AVX
-#endif
-#if defined(PIPE_CC_GCC) && !defined(__AVX2__)
-/* #warning AVX2 support requires -mavx2 compiler options */
-#else
-//#define PIPE_ARCH_AVX2
+#define PIPE_ARCH_SSSE3
#endif
#endif
More information about the mesa-commit
mailing list