[Mesa-dev] [PATCH 4/6] swr/rast: SIMD16 Frontend - Fix USE_SIMD16_FRONTEND build
Tim Rowley
timothy.o.rowley at intel.com
Wed Jul 12 15:38:29 UTC 2017
Previous check-ins without testing with USE_SIMD16_FRONTEND have
introduced regressions. This fixes the build, not the regressions.
---
.../drivers/swr/rasterizer/common/simd16intrin.h | 14 +++++++++++++-
.../swr/rasterizer/common/simdlib_512_avx512.inl | 2 +-
src/gallium/drivers/swr/rasterizer/core/binner.cpp | 19 ++++++++++---------
src/gallium/drivers/swr/rasterizer/core/clip.h | 2 +-
4 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
index 2915168..a160ca2 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
+++ b/src/gallium/drivers/swr/rasterizer/common/simd16intrin.h
@@ -126,7 +126,7 @@ typedef SIMD512 SIMD16;
#define _simd16_add_epi8 SIMD16::add_epi8
#define _simd16_shuffle_epi8 SIMD16::shuffle_epi8
-#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(index, m)
+#define _simd16_i32gather_ps(m, index, scale) SIMD16::i32gather_ps<SIMD16::ScaleFactor(scale)>(m, index)
#define _simd16_mask_i32gather_ps(a, m, index, mask, scale) SIMD16::mask_i32gather_ps<SIMD16::ScaleFactor(scale)>(a, m, index, mask)
#define _simd16_abs_epi32 SIMD16::abs_epi32
@@ -162,6 +162,18 @@ typedef SIMD512 SIMD16;
#define _simd16_int2mask(mask) simd16mask(mask)
#define _simd16_mask2int(mask) int(mask)
+// convert bitmask to vector mask
+SIMDINLINE simd16scalar vMask16(int32_t mask)
+{
+ simd16scalari temp = _simd16_set1_epi32(mask);
+
+ simd16scalari bits = _simd16_set_epi32(0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001);
+
+ simd16scalari result = _simd16_cmplt_epi32(_simd16_setzero_si(), _simd16_and_si(temp, bits));
+
+ return _simd16_castsi_ps(result);
+}
+
#endif//ENABLE_AVX512_SIMD16
#endif//__SWR_SIMD16INTRIN_H_
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
index 226952e..535e4ed 100644
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -543,7 +543,7 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In
{
__mmask16 k = _mm512_cmpneq_ps_mask(mask, setzero_ps());
- return _mm512_mask_i32gather_ps(old, k, idx, p, ScaleT);
+ return _mm512_mask_i32gather_ps(old, k, idx, p, static_cast<int>(ScaleT));
}
static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
diff --git a/src/gallium/drivers/swr/rasterizer/core/binner.cpp b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
index 34789cf..de6691b 100644
--- a/src/gallium/drivers/swr/rasterizer/core/binner.cpp
+++ b/src/gallium/drivers/swr/rasterizer/core/binner.cpp
@@ -1076,13 +1076,14 @@ void SIMDCALL BinTriangles_simd16(
(SWR_INPUT_COVERAGE)pDC->pState->state.psState.inputCoverage, EdgeValToEdgeState(ALL_EDGES_VALID), (state.scissorsTileAligned == false));
}
+ simd16BBox bbox;
+
if (!triMask)
{
goto endBinTriangles;
}
// Calc bounding box of triangles
- simd16BBox bbox;
calcBoundingBoxIntVertical<CT>(tri, vXi, vYi, bbox);
// determine if triangle falls between pixel centers and discard
@@ -2102,7 +2103,7 @@ void SIMDCALL BinPoints_simd16(
// OOB indices => forced to zero.
simd16scalari vpai = _simd16_castps_si(vpiAttrib[0][VERTEX_SGV_VAI_COMP]);
- vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai)
+ vpai = _simd16_max_epi32(_simd16_setzero_si(), vpai);
simd16scalari vNumViewports = _simd16_set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
simd16scalari vClearMask = _simd16_cmplt_epi32(vpai, vNumViewports);
viewportIdx = _simd16_and_si(vClearMask, vpai);
@@ -2461,6 +2462,13 @@ void BinPostSetupLines_simd16(
const simdscalar unused = _simd_setzero_ps();
+ // transpose verts needed for backend
+ /// @todo modify BE to take non-transformed verts
+ simd4scalar vHorizX[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+ simd4scalar vHorizY[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+ simd4scalar vHorizZ[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+ simd4scalar vHorizW[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
+
if (!primMask)
{
goto endBinLines;
@@ -2479,13 +2487,6 @@ void BinPostSetupLines_simd16(
_simd16_store_si(reinterpret_cast<simd16scalari *>(aMTTop), bbox.ymin);
_simd16_store_si(reinterpret_cast<simd16scalari *>(aMTBottom), bbox.ymax);
- // transpose verts needed for backend
- /// @todo modify BE to take non-transformed verts
- simd4scalar vHorizX[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
- simd4scalar vHorizY[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
- simd4scalar vHorizZ[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
- simd4scalar vHorizW[2][KNOB_SIMD_WIDTH]; // KNOB_SIMD16_WIDTH
-
vTranspose3x8(vHorizX[0], _simd16_extract_ps(prim[0].x, 0), _simd16_extract_ps(prim[1].x, 0), unused);
vTranspose3x8(vHorizY[0], _simd16_extract_ps(prim[0].y, 0), _simd16_extract_ps(prim[1].y, 0), unused);
vTranspose3x8(vHorizZ[0], _simd16_extract_ps(prim[0].z, 0), _simd16_extract_ps(prim[1].z, 0), unused);
diff --git a/src/gallium/drivers/swr/rasterizer/core/clip.h b/src/gallium/drivers/swr/rasterizer/core/clip.h
index 8a4fe6d..36c8402 100644
--- a/src/gallium/drivers/swr/rasterizer/core/clip.h
+++ b/src/gallium/drivers/swr/rasterizer/core/clip.h
@@ -1095,7 +1095,7 @@ public:
AR_BEGIN(FEGuardbandClip, pa.pDC->drawId);
// we have to clip tris, execute the clipper, which will also
// call the binner
- ClipSimd(_simd16_vmask_ps(primMask), _simd16_vmask_ps(clipMask), pa, primId);
+ ClipSimd(vMask(primMask), vMask(clipMask), pa, primId);
AR_END(FEGuardbandClip, 1);
}
else if (validMask)
--
2.7.4
More information about the mesa-dev
mailing list