[Mesa-dev] [PATCH 17/20] swr/rast: Replace VPSRL with LSHR
Tim Rowley
timothy.o.rowley at intel.com
Thu Dec 14 21:34:57 UTC 2017
Replace use of x86 intrinsic with general llvm IR instruction.
Generates the same final assembly.
---
.../swr/rasterizer/codegen/gen_llvm_ir_macros.py | 2 --
.../drivers/swr/rasterizer/jitter/builder_misc.cpp | 30 ----------------------
.../drivers/swr/rasterizer/jitter/builder_misc.h | 5 ----
.../drivers/swr/rasterizer/jitter/fetch_jit.cpp | 8 +++---
4 files changed, 4 insertions(+), 41 deletions(-)
diff --git a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
index 8bbf36d9b8..9544353eb9 100644
--- a/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
+++ b/src/gallium/drivers/swr/rasterizer/codegen/gen_llvm_ir_macros.py
@@ -47,8 +47,6 @@ intrinsics = [
['VGATHERPS_16', 'x86_avx512_gather_dps_512', ['src', 'pBase', 'indices', 'mask', 'scale']],
['VGATHERDD', 'x86_avx2_gather_d_d_256', ['src', 'pBase', 'indices', 'mask', 'scale']],
['VGATHERDD_16', 'x86_avx512_gather_dpi_512', ['src', 'pBase', 'indices', 'mask', 'scale']],
- ['VPSRLI', 'x86_avx2_psrli_d', ['src', 'imm']],
- ['VPSRLI_16', 'x86_avx512_psrli_d_512', ['src', 'imm']],
['VSQRTPS', 'x86_avx_sqrt_ps_256', ['a']],
['VRSQRTPS', 'x86_avx_rsqrt_ps_256', ['a']],
['VRCPPS', 'x86_avx_rcp_ps_256', ['a']],
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
index 684c9fac54..bdcafd28a3 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.cpp
@@ -808,36 +808,6 @@ namespace SwrJit
return vGather;
}
-#if USE_SIMD16_BUILDER
- Value *Builder::PSRLI(Value *a, Value *imm)
- {
- return VPSRLI(a, imm);
- }
-
- Value *Builder::PSRLI_16(Value *a, Value *imm)
- {
- Value *result = VUNDEF2_I();
-
- // use avx512 shift right instruction if available
- if (JM()->mArch.AVX512F())
- {
- result = VPSRLI_16(a, imm);
- }
- else
- {
- Value *a0 = EXTRACT2_I(a, 0);
- Value *a1 = EXTRACT2_I(a, 1);
-
- Value *result0 = PSRLI(a0, imm);
- Value *result1 = PSRLI(a1, imm);
-
- result = JOIN2(result0, result1);
- }
-
- return result;
- }
-
-#endif
#if USE_SIMD16_BUILDER
//////////////////////////////////////////////////////////////////////////
/// @brief
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
index 6c883d8f52..98bc563351 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
+++ b/src/gallium/drivers/swr/rasterizer/jitter/builder_misc.h
@@ -143,11 +143,6 @@ void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
-#if USE_SIMD16_BUILDER
-Value *PSRLI(Value *a, Value *imm);
-Value *PSRLI_16(Value *a, Value *imm);
-
-#endif
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 1312ac0009..8d97ddfdc9 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -1422,12 +1422,12 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
#if USE_SIMD16_BUILDER
- Value *shiftedOffsets = VPSRLI_16(vOffsets16, C(1));
+ Value *shiftedOffsets = LSHR(vOffsets16, 1);
pVtxSrc2[currentVertexElement] = GATHERPS_16(gatherSrc16, pStreamBase, shiftedOffsets, vGatherMask16, 2);
#else
- Value *vShiftedOffsets = VPSRLI(vOffsets, C(1));
- Value *vShiftedOffsets2 = VPSRLI(vOffsets2, C(1));
+ Value *vShiftedOffsets = LSHR(vOffsets, 1);
+ Value *vShiftedOffsets2 = LSHR(vOffsets2, 1);
vVertexElements[currentVertexElement] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2);
vVertexElements2[currentVertexElement] = GATHERPS(gatherSrc2, pStreamBase, vShiftedOffsets2, vGatherMask2, 2);
@@ -1492,7 +1492,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
// However, GATHERPS uses signed 32-bit offsets, so only a 2GB range :(
// But, we know that elements must be aligned for FETCH. :)
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
- Value* vShiftedOffsets = VPSRLI(vOffsets, C(1));
+ Value* vShiftedOffsets = LSHR(vOffsets, 1);
vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2);
}
else
--
2.14.1
More information about the mesa-dev
mailing list