[Mesa-dev] [PATCH v2 16/17] swr/rast: use gather instruction for odd format fetch

Tim Rowley timothy.o.rowley at intel.com
Wed Apr 26 18:33:17 UTC 2017


Small fetch performance optimization - use gather instruction
for odd format fetch instead of slow emulated code.
---
 .../drivers/swr/rasterizer/jitter/fetch_jit.cpp    | 55 ++++------------------
 1 file changed, 9 insertions(+), 46 deletions(-)

diff --git a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
index 8fc31ae..50c2e9b 100644
--- a/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
+++ b/src/gallium/drivers/swr/rasterizer/jitter/fetch_jit.cpp
@@ -590,64 +590,27 @@ void FetchJit::UnpackComponents(SWR_FORMAT format, Value* vInput, Value* result[
 // gather for odd component size formats
 // gather SIMD full pixels per lane then shift/mask to move each component to their
 // own vector
-void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* offsets, Value* result[4])
+void FetchJit::CreateGatherOddFormats(SWR_FORMAT format, Value* pMask, Value* pBase, Value* pOffsets, Value* pResult[4])
 {
     const SWR_FORMAT_INFO &info = GetFormatInfo(format);
 
     // only works if pixel size is <= 32bits
     SWR_ASSERT(info.bpp <= 32);
 
-    Value* gather = VUNDEF_I();
+	Value* pGather = GATHERDD(VIMMED1(0), pBase, pOffsets, pMask, C((char)1));
 
-    // assign defaults
     for (uint32_t comp = 0; comp < 4; ++comp)
     {
-        result[comp] = VIMMED1((int)info.defaults[comp]);
+        pResult[comp] = VIMMED1((int)info.defaults[comp]);
     }
 
-    // load the proper amount of data based on component size
-    PointerType* pLoadTy = nullptr;
-    switch (info.bpp)
-    {
-    case 8: pLoadTy = Type::getInt8PtrTy(JM()->mContext); break;
-    case 16: pLoadTy = Type::getInt16PtrTy(JM()->mContext); break;
-    case 24:
-    case 32: pLoadTy = Type::getInt32PtrTy(JM()->mContext); break;
-    default: SWR_INVALID("Invalid bpp: %d", info.bpp);
-    }
-
-    // allocate temporary memory for masked off lanes
-    Value* pTmp = ALLOCA(pLoadTy->getElementType());
-
-    // gather SIMD pixels
-    for (uint32_t e = 0; e < JM()->mVWidth; ++e)
-    {
-        Value* pElemOffset = VEXTRACT(offsets, C(e));
-        Value* pLoad = GEP(pBase, pElemOffset);
-        Value* pLaneMask = VEXTRACT(pMask, C(e));
-
-        pLoad = POINTER_CAST(pLoad, pLoadTy);
-
-        // mask in tmp pointer for disabled lanes
-        pLoad = SELECT(pLaneMask, pLoad, pTmp);
-
-        // load pixel
-        Value *val = LOAD(pLoad);
-
-        // zero extend to 32bit integer
-        val = INT_CAST(val, mInt32Ty, false);
-
-        // store in simd lane
-        gather = VINSERT(gather, val, C(e));
-    }
-
-    UnpackComponents(format, gather, result);
+    UnpackComponents(format, pGather, pResult);
 
     // cast to fp32
-    result[0] = BITCAST(result[0], mSimdFP32Ty);
-    result[1] = BITCAST(result[1], mSimdFP32Ty);
-    result[2] = BITCAST(result[2], mSimdFP32Ty);
-    result[3] = BITCAST(result[3], mSimdFP32Ty);
+    pResult[0] = BITCAST(pResult[0], mSimdFP32Ty);
+    pResult[1] = BITCAST(pResult[1], mSimdFP32Ty);
+    pResult[2] = BITCAST(pResult[2], mSimdFP32Ty);
+    pResult[3] = BITCAST(pResult[3], mSimdFP32Ty);
 }
 
 void FetchJit::ConvertFormat(SWR_FORMAT format, Value *texels[4])
@@ -860,7 +823,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
         if (IsOddFormat((SWR_FORMAT)ied.Format))
         {
             Value* pResults[4];
-            CreateGatherOddFormats((SWR_FORMAT)ied.Format, pMask, pStreamBase, vOffsets, pResults);
+            CreateGatherOddFormats((SWR_FORMAT)ied.Format, vGatherMask, pStreamBase, vOffsets, pResults);
             ConvertFormat((SWR_FORMAT)ied.Format, pResults);
 
             for (uint32_t c = 0; c < 4; ++c)
-- 
2.7.4



More information about the mesa-dev mailing list