[Mesa-dev] [PATCH 06/20] swr/rast: Rewrite Shuffle8bpcGatherd using shuffle
Tim Rowley
timothy.o.rowley at intel.com
Thu Dec 14 21:34:46 UTC 2017
Ease future code maintenance, prepare for folding simd8 and simd16 versions.
---
.../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 244 ++++++---------------
1 file changed, 62 insertions(+), 182 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 67a4a04072..a847cb74da 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -2014,206 +2014,86 @@ void FetchJit::Shuffle8bpcGatherd(Shuffle8bpcArgs &args)
const uint32_t (&swizzle)[4] = std::get<9>(args);
// cast types
- Type* vGatherTy = mSimdInt32Ty;
Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
- // have to do extra work for sign extending
- if ((extendType == Instruction::CastOps::SExt) || (extendType == Instruction::CastOps::SIToFP)){
- Type* v16x8Ty = VectorType::get(mInt8Ty, mVWidth * 2); // 8x16bit ints in a 128bit lane
- Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
-
- // shuffle mask, including any swizzling
- const char x = (char)swizzle[0]; const char y = (char)swizzle[1];
- const char z = (char)swizzle[2]; const char w = (char)swizzle[3];
- Value* vConstMask = C<char>({char(x), char(x+4), char(x+8), char(x+12),
- char(y), char(y+4), char(y+8), char(y+12),
- char(z), char(z+4), char(z+8), char(z+12),
- char(w), char(w+4), char(w+8), char(w+12),
- char(x), char(x+4), char(x+8), char(x+12),
- char(y), char(y+4), char(y+8), char(y+12),
- char(z), char(z+4), char(z+8), char(z+12),
- char(w), char(w+4), char(w+8), char(w+12)});
-
- Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb: group components together in each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
-
- Value* vi128XY = nullptr;
- if(isComponentEnabled(compMask, 0) || isComponentEnabled(compMask, 1)){
- vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty);
- // after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
- // 256i - 0 1 2 3 4 5 6 7
- // xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
- }
-
- // do the same for zw components
- Value* vi128ZW = nullptr;
- if(isComponentEnabled(compMask, 2) || isComponentEnabled(compMask, 3)){
- vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty);
- }
-
- // init denormalize variables if needed
- Instruction::CastOps fpCast;
- Value* conversionFactor;
-
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 127.0));
- break;
- case CONVERT_SSCALED:
- fpCast = Instruction::CastOps::SIToFP;
- conversionFactor = VIMMED1((float)(1.0));
- break;
- case CONVERT_USCALED:
- SWR_INVALID("Type should not be sign extended!");
- conversionFactor = nullptr;
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
- }
+ for (uint32_t i = 0; i < 4; i++)
+ {
+ if (!isComponentEnabled(compMask, i))
+ continue;
- // sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
- for (uint32_t i = 0; i < 4; i++)
+ if (compCtrl[i] == ComponentControl::StoreSrc)
{
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
- uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
- // if x or y, use vi128XY permute result, else use vi128ZW
- Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
-
- // sign extend
- vVertexElements[currentVertexElement] = PMOVSXBD(BITCAST(VEXTRACT(selectedPermute, C(lane)), v16x8Ty));
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
- }
- currentVertexElement++;
- }
- else
+ std::vector<uint32_t> vShuffleMasks[4] = {
+ { 0, 4, 8, 12, 16, 20, 24, 28 }, // x
+ { 1, 5, 9, 13, 17, 21, 25, 29 }, // y
+ { 2, 6, 10, 14, 18, 22, 26, 30 }, // z
+ { 3, 7, 11, 15, 19, 23, 27, 31 }, // w
+ };
+
+ Value *val = VSHUFFLE(BITCAST(vGatherResult, v32x8Ty),
+ UndefValue::get(v32x8Ty),
+ vShuffleMasks[swizzle[i]]);
+
+ if ((extendType == Instruction::CastOps::SExt) ||
+ (extendType == Instruction::CastOps::SIToFP)) {
+ switch (conversionType)
{
-#if USE_SIMD16_SHADERS
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
-#else
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
-#endif
+ case CONVERT_NORMALIZED:
+ val = FMUL(SI_TO_FP(val, mSimdFP32Ty), VIMMED1((float)(1.0 / 127.0)));
+ break;
+ case CONVERT_SSCALED:
+ val = SI_TO_FP(val, mSimdFP32Ty);
+ break;
+ case CONVERT_USCALED:
+ SWR_INVALID("Type should not be sign extended!");
+ break;
+ default:
+ SWR_ASSERT(conversionType == CONVERT_NONE);
+ val = S_EXT(val, mSimdInt32Ty);
+ break;
}
-
- if (currentVertexElement > 3)
+ } else if ((extendType == Instruction::CastOps::ZExt) ||
+ (extendType == Instruction::CastOps::UIToFP)) {
+ switch (conversionType)
{
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
+ case CONVERT_NORMALIZED:
+ val = FMUL(UI_TO_FP(val, mSimdFP32Ty), VIMMED1((float)(1.0 / 255.0)));
+ break;
+ case CONVERT_SSCALED:
+ SWR_INVALID("Type should not be zero extended!");
+ break;
+ case CONVERT_USCALED:
+ val = UI_TO_FP(val, mSimdFP32Ty);
+ break;
+ default:
+ SWR_ASSERT(conversionType == CONVERT_NONE);
+ val = Z_EXT(val, mSimdInt32Ty);
+ break;
}
}
- }
- }
- // else zero extend
- else if ((extendType == Instruction::CastOps::ZExt) || (extendType == Instruction::CastOps::UIToFP))
- {
- // init denormalize variables if needed
- Instruction::CastOps fpCast;
- Value* conversionFactor;
+ else
+ {
+ SWR_INVALID("Unsupported conversion type");
+ }
- switch (conversionType)
- {
- case CONVERT_NORMALIZED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0 / 255.0));
- break;
- case CONVERT_USCALED:
- fpCast = Instruction::CastOps::UIToFP;
- conversionFactor = VIMMED1((float)(1.0));
- break;
- case CONVERT_SSCALED:
- SWR_INVALID("Type should not be zero extended!");
- conversionFactor = nullptr;
- break;
- default:
- SWR_ASSERT(conversionType == CONVERT_NONE);
- conversionFactor = nullptr;
- break;
+ vVertexElements[currentVertexElement++] = val;
}
-
- // shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
- for (uint32_t i = 0; i < 4; i++)
+ else
{
- if (isComponentEnabled(compMask, i))
- {
- if (compCtrl[i] == ComponentControl::StoreSrc)
- {
- // pshufb masks for each component
- Value* vConstMask;
- switch (swizzle[i])
- {
- case 0:
- // x shuffle mask
- vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
- 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
- break;
- case 1:
- // y shuffle mask
- vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
- 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
- break;
- case 2:
- // z shuffle mask
- vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
- 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
- break;
- case 3:
- // w shuffle mask
- vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
- 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
- break;
- default:
- vConstMask = nullptr;
- break;
- }
-
- vVertexElements[currentVertexElement] = BITCAST(PSHUFB(BITCAST(vGatherResult, v32x8Ty), vConstMask), vGatherTy);
- // after pshufb for x channel
- // 256i - 0 1 2 3 4 5 6 7
- // x000 x000 x000 x000 x000 x000 x000 x000
-
- // denormalize if needed
- if (conversionType != CONVERT_NONE)
- {
- vVertexElements[currentVertexElement] = FMUL(CAST(fpCast, vVertexElements[currentVertexElement], mSimdFP32Ty), conversionFactor);
- }
- currentVertexElement++;
- }
- else
- {
#if USE_SIMD16_SHADERS
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
+ vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i], useVertexID2);
#else
- vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
+ vVertexElements[currentVertexElement++] = GenerateCompCtrlVector(compCtrl[i]);
#endif
- }
+ }
- if (currentVertexElement > 3)
- {
- StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
- // reset to the next vVertexElement to output
- currentVertexElement = 0;
- }
- }
+ if (currentVertexElement > 3)
+ {
+ StoreVertexElements(pVtxOut, outputElt++, 4, vVertexElements);
+ // reset to the next vVertexElement to output
+ currentVertexElement = 0;
}
}
- else
- {
- SWR_INVALID("Unsupported conversion type");
- }
}
#if USE_SIMD16_BUILDER
--
2.14.1
More information about the mesa-dev
mailing list