<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
</head>
<body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">
Since there isn’t much code difference at this point, I was holding off on this until we had made changes to how the different architecture swr builds are built/linked to minimize build time and disk space.
<div class=""><br class="">
</div>
<div class="">-Tim</div>
<div class=""><br class="">
<div>
<blockquote type="cite" class="">
<div class="">On Jun 20, 2016, at 9:27 AM, Chuck Atkins <<a href="mailto:chuck.atkins@kitware.com" class="">chuck.atkins@kitware.com</a>> wrote:</div>
<br class="Apple-interchange-newline">
<div class="">
<div dir="ltr" class="">Doesn't this also need corresponding compiler flags in <a href="http://configure.ac/" class="">
configure.ac</a> to populate SWR_AVX512_CXXFLAGS?<br class="">
</div>
<div class="gmail_extra"><br clear="all" class="">
<div class="">
<div class="gmail_signature" data-smartmail="gmail_signature">
<div dir="ltr" class="">- Chuck<br class="">
</div>
</div>
</div>
<br class="">
<div class="gmail_quote">On Fri, Jun 17, 2016 at 3:25 PM, Tim Rowley <span dir="ltr" class="">
<<a href="mailto:timothy.o.rowley@intel.com" target="_blank" class="">timothy.o.rowley@intel.com</a>></span> wrote:<br class="">
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Currently, most code paths between AVX2 and AVX512 are identical<br class="">
(see changes to knobs.h).<br class="">
---<br class="">
src/gallium/drivers/swr/rasterizer/common/simdintrin.h | 4 ++--<br class="">
src/gallium/drivers/swr/rasterizer/core/format_types.h | 8 ++++----<br class="">
src/gallium/drivers/swr/rasterizer/core/knobs.h | 15 ++++++++++-----<br class="">
src/gallium/drivers/swr/rasterizer/memory/Convert.h | 4 ++--<br class="">
src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp | 4 ++--<br class="">
5 files changed, 20 insertions(+), 15 deletions(-)<br class="">
<br class="">
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h<br class="">
index 5ec1f71..cc29b5d 100644<br class="">
--- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h<br class="">
+++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h<br class="">
@@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)<br class="">
INLINE<br class="">
UINT pdep_u32(UINT a, UINT mask)<br class="">
{<br class="">
-#if KNOB_ARCH==KNOB_ARCH_AVX2<br class="">
+#if KNOB_ARCH >= KNOB_ARCH_AVX2<br class="">
return _pdep_u32(a, mask);<br class="">
#else<br class="">
UINT result = 0;<br class="">
@@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)<br class="">
INLINE<br class="">
UINT pext_u32(UINT a, UINT mask)<br class="">
{<br class="">
-#if KNOB_ARCH==KNOB_ARCH_AVX2<br class="">
+#if KNOB_ARCH >= KNOB_ARCH_AVX2<br class="">
return _pext_u32(a, mask);<br class="">
#else<br class="">
UINT result = 0;<br class="">
diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h b/src/gallium/drivers/swr/rasterizer/core/format_types.h<br class="">
index afb6337..6612c83 100644<br class="">
--- a/src/gallium/drivers/swr/rasterizer/core/format_types.h<br class="">
+++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h<br class="">
@@ -98,7 +98,7 @@ struct PackTraits<8, false><br class="">
__m256i result = _mm256_castsi128_si256(resLo);<br class="">
result = _mm256_insertf128_si256(result, resHi, 1);<br class="">
return _mm256_castsi256_ps(result);<br class="">
-#elif KNOB_ARCH==KNOB_ARCH_AVX2<br class="">
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2<br class="">
return _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));<br class="">
#endif<br class="">
#else<br class="">
@@ -161,7 +161,7 @@ struct PackTraits<8, true><br class="">
__m256i result = _mm256_castsi128_si256(resLo);<br class="">
result = _mm256_insertf128_si256(result, resHi, 1);<br class="">
return _mm256_castsi256_ps(result);<br class="">
-#elif KNOB_ARCH==KNOB_ARCH_AVX2<br class="">
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2<br class="">
return _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));<br class="">
#endif<br class="">
#else<br class="">
@@ -223,7 +223,7 @@ struct PackTraits<16, false><br class="">
__m256i result = _mm256_castsi128_si256(resLo);<br class="">
result = _mm256_insertf128_si256(result, resHi, 1);<br class="">
return _mm256_castsi256_ps(result);<br class="">
-#elif KNOB_ARCH==KNOB_ARCH_AVX2<br class="">
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2<br class="">
return _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));<br class="">
#endif<br class="">
#else<br class="">
@@ -285,7 +285,7 @@ struct PackTraits<16, true><br class="">
__m256i result = _mm256_castsi128_si256(resLo);<br class="">
result = _mm256_insertf128_si256(result, resHi, 1);<br class="">
return _mm256_castsi256_ps(result);<br class="">
-#elif KNOB_ARCH==KNOB_ARCH_AVX2<br class="">
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2<br class="">
return _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));<br class="">
#endif<br class="">
#else<br class="">
diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h<br class="">
index 55a22a6..2629276 100644<br class="">
--- a/src/gallium/drivers/swr/rasterizer/core/knobs.h<br class="">
+++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h<br class="">
@@ -52,11 +52,16 @@<br class="">
#define KNOB_SIMD_WIDTH 8<br class="">
#define KNOB_SIMD_BYTES 32<br class="">
#elif (KNOB_ARCH == KNOB_ARCH_AVX512)<br class="">
-#define KNOB_ARCH_ISA AVX512F<br class="">
-#define KNOB_ARCH_STR "AVX512"<br class="">
-#define KNOB_SIMD_WIDTH 16<br class="">
-#define KNOB_SIMD_BYTES 64<br class="">
-#error "AVX512 not yet supported"<br class="">
+#define KNOB_ARCH_ISA AVX2<br class="">
+#define KNOB_ARCH_STR "AVX2"<br class="">
+#define KNOB_SIMD_WIDTH 8<br class="">
+#define KNOB_SIMD_BYTES 32<br class="">
+// Disable AVX512 for now...<br class="">
+//#define KNOB_ARCH_ISA AVX512F<br class="">
+//#define KNOB_ARCH_STR "AVX512"<br class="">
+//#define KNOB_SIMD_WIDTH 16<br class="">
+//#define KNOB_SIMD_BYTES 64<br class="">
+//#error "AVX512 not yet supported"<br class="">
#else<br class="">
#error "Unknown architecture"<br class="">
#endif<br class="">
diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h b/src/gallium/drivers/swr/rasterizer/memory/Convert.h<br class="">
index 42b973c..b790d35 100644<br class="">
--- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h<br class="">
+++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h<br class="">
@@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(<br class="">
// Convert from 32-bit float to 16-bit float using _mm_cvtps_ph<br class="">
// @todo 16bit float instruction support is orthogonal to avx support. need to<br class="">
// add check for F16C support instead.<br class="">
-#if KNOB_ARCH == KNOB_ARCH_AVX2<br class="">
+#if KNOB_ARCH >= KNOB_ARCH_AVX2<br class="">
__m128 src128 = _mm_set1_ps(src);<br class="">
__m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);<br class="">
UINT value = _mm_extract_epi16(srci128, 0);<br class="">
@@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat(<br class="">
float dst;<br class="">
if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)<br class="">
{<br class="">
-#if KNOB_ARCH == KNOB_ARCH_AVX2<br class="">
+#if KNOB_ARCH >= KNOB_ARCH_AVX2<br class="">
// Convert from 16-bit float to 32-bit float using _mm_cvtph_ps<br class="">
// @todo 16bit float instruction support is orthogonal to avx support. need to<br class="">
// add check for F16C support instead.<br class="">
diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp<br class="">
index 2ab2936..8a26ff6 100644<br class="">
--- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp<br class="">
+++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp<br class="">
@@ -454,7 +454,7 @@ INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst<br class="">
__m256i final = _mm256_castsi128_si256(vRow00);<br class="">
final = _mm256_insertf128_si256(final, vRow10, 1);<br class="">
<br class="">
-#elif KNOB_ARCH == KNOB_ARCH_AVX2<br class="">
+#elif KNOB_ARCH >= KNOB_ARCH_AVX2<br class="">
<br class="">
// logic is as above, only wider<br class="">
src1 = _mm256_slli_si256(src1, 1);<br class="">
@@ -542,7 +542,7 @@ INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_<br class="">
__m256i final = _mm256_castsi128_si256(vRow00);<br class="">
final = _mm256_insertf128_si256(final, vRow10, 1);<br class="">
<br class="">
-#elif KNOB_ARCH == KNOB_ARCH_AVX2<br class="">
+#elif KNOB_ARCH >= KNOB_ARCH_AVX2<br class="">
<br class="">
// logic is as above, only wider<br class="">
src1 = _mm256_slli_si256(src1, 1);<br class="">
<span class="HOEnZb"><font color="#888888" class="">--<br class="">
1.9.1<br class="">
<br class="">
_______________________________________________<br class="">
mesa-dev mailing list<br class="">
<a href="mailto:mesa-dev@lists.freedesktop.org" class="">mesa-dev@lists.freedesktop.org</a><br class="">
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank" class="">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br class="">
</font></span></blockquote>
</div>
<br class="">
</div>
</div>
</blockquote>
</div>
<br class="">
</div>
</body>
</html>