[Beignet] [PATCH] GBE: Optimize byte gather read using untyped read.

Ruiling Song ruiling.song at intel.com
Tue Apr 22 19:56:50 PDT 2014


Untyped read seems better than byte gather read.
Some performance test in opencv got doubled after the patch.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |   25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bcbf115..8c7ac09 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2594,19 +2594,22 @@ namespace gbe
       } else {
         GBE_ASSERT(insn.getValueNum() == 1);
         const GenRegister value = sel.selReg(insn.getValue(0));
-        // We need a temporary register if we read bytes or words
-        Register dst = Register(value.value.reg);
-        if (elemSize == GEN_BYTE_SCATTER_WORD ||
-            elemSize == GEN_BYTE_SCATTER_BYTE) {
-          dst = sel.reg(FAMILY_DWORD);
-          sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti);
-        }
-
-        // Repack bytes or words using a converting mov instruction
+        GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
+
+        Register tmpReg = sel.reg(FAMILY_DWORD);
+        GenRegister tmpAddr = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+        GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg);
+        // Get dword aligned addr
+        sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+        sel.UNTYPED_READ(tmpAddr, &tmpData, 1, bti);
+        // Get the remaining offset from aligned addr
+        sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+        sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
+        sel.SHR(tmpData, tmpData, tmpAddr);
         if (elemSize == GEN_BYTE_SCATTER_WORD)
-          sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
+          sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg));
         else if (elemSize == GEN_BYTE_SCATTER_BYTE)
-          sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
+          sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(tmpReg));
       }
     }
 
-- 
1.7.10.4



More information about the Beignet mailing list