[Beignet] [PATCH] GBE: Optimize byte gather read using untyped read.
Ruiling Song
ruiling.song at intel.com
Tue Apr 22 19:56:50 PDT 2014
Untyped read seems better than byte gather read.
Some performance test in opencv got doubled after the patch.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bcbf115..8c7ac09 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2594,19 +2594,22 @@ namespace gbe
} else {
GBE_ASSERT(insn.getValueNum() == 1);
const GenRegister value = sel.selReg(insn.getValue(0));
- // We need a temporary register if we read bytes or words
- Register dst = Register(value.value.reg);
- if (elemSize == GEN_BYTE_SCATTER_WORD ||
- elemSize == GEN_BYTE_SCATTER_BYTE) {
- dst = sel.reg(FAMILY_DWORD);
- sel.BYTE_GATHER(GenRegister::fxgrf(simdWidth, dst), address, elemSize, bti);
- }
-
- // Repack bytes or words using a converting mov instruction
+ GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
+
+ Register tmpReg = sel.reg(FAMILY_DWORD);
+ GenRegister tmpAddr = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
+ GenRegister tmpData = GenRegister::udxgrf(simdWidth, tmpReg);
+ // Get dword aligned addr
+ sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0xfffffffc));
+ sel.UNTYPED_READ(tmpAddr, &tmpData, 1, bti);
+ // Get the remaining offset from aligned addr
+ sel.AND(tmpAddr, GenRegister::retype(address,GEN_TYPE_UD), GenRegister::immud(0x3));
+ sel.SHL(tmpAddr, tmpAddr, GenRegister::immud(0x3));
+ sel.SHR(tmpData, tmpData, tmpAddr);
if (elemSize == GEN_BYTE_SCATTER_WORD)
- sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
+ sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(tmpReg));
else if (elemSize == GEN_BYTE_SCATTER_BYTE)
- sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
+ sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(tmpReg));
}
}
--
1.7.10.4
More information about the Beignet
mailing list