[Mesa-dev] [PATCH 3/5] swr/rast: fix VCVTPD2PS generation for AVX512

Alok Hota alok.hota at intel.com
Wed May 16 16:14:19 UTC 2018


---
 .../swr/rasterizer/jitter/functionpasses/lower_x86.cpp       | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
index 3caea67..e0296f6 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/functionpasses/lower_x86.cpp
@@ -265,8 +265,16 @@ namespace SwrJit
                 // Assuming the intrinsics are consistent and place the src operand and mask last in the argument list.
                 if (mTarget == AVX512)
                 {
-                    args.push_back(GetZeroVec(vecWidth, pElemTy));
-                    args.push_back(GetMask(vecWidth));
+                    if (pFunc->getName().equals("meta.intrinsic.VCVTPD2PS")) {
+                        args.push_back(GetZeroVec(W256, pCallInst->getType()->getScalarType()));
+                        args.push_back(GetMask(W256));
+                        // for AVX512 VCVTPD2PS, we also have to add rounding mode
+                        args.push_back(B->C(_MM_FROUND_TO_NEAREST_INT |
+                                            _MM_FROUND_NO_EXC));
+                    } else {
+                        args.push_back(GetZeroVec(vecWidth, pElemTy));
+                        args.push_back(GetMask(vecWidth));
+                    }
                 }
 
                 return B->CALLA(pIntrin, args);
-- 
2.7.4



More information about the mesa-dev mailing list