[Beignet] [PATCH 2/2] Use the Byte Gather after HSW when read byte/shor.

Yang Rong rong.r.yang at intel.com
Sun Jun 14 23:45:35 PDT 2015


After HSW, the byte gather's performance issue has gone, so needn't read
dword and extract.
But for multi dst load, the combine reduce the
address calc, but need the extract the dst, maybe performance is
approximate, so still use the old logic.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 36 ++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index d63c7e3..d289e8e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -365,6 +365,8 @@ namespace gbe
     void setLongRegRestrict(bool b) { bLongRegRestrict = b; }
     void setLdMsgOrder(uint32_t type)  { ldMsgOrder = type; }
     uint32_t getLdMsgOrder()  const { return ldMsgOrder; }
+    void setSlowByteGather(bool b) { slowByteGather = b; }
+    bool getSlowByteGather() { return slowByteGather; }
     /*! indicate whether a register is a scalar/uniform register. */
     INLINE bool isPartialWrite(const ir::Register &reg) const {
       return partialWriteRegs.find(reg.value()) != partialWriteRegs.end();
@@ -740,6 +742,7 @@ namespace gbe
     bool bHasLongType;
     bool bLongRegRestrict;
     uint32_t ldMsgOrder;
+    bool slowByteGather;
     INLINE ir::LabelIndex newAuxLabel()
     {
       currAuxLabel++;
@@ -779,7 +782,8 @@ namespace gbe
     curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
     maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
     stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
-    bHas32X32Mul(false), bHasLongType(false), bLongRegRestrict(false), ldMsgOrder(LD_MSG_ORDER_IVB)
+    bHas32X32Mul(false), bHasLongType(false), bLongRegRestrict(false), ldMsgOrder(LD_MSG_ORDER_IVB),
+    slowByteGather(false)
   {
     const ir::Function &fn = ctx.getFunction();
     this->regNum = fn.regNum();
@@ -2025,26 +2029,31 @@ namespace gbe
   Selection::Selection(GenContext &ctx) {
     this->blockList = NULL;
     this->opaque = GBE_NEW(Selection::Opaque, ctx);
+    this->opaque->setSlowByteGather(true);
   }
 
   Selection75::Selection75(GenContext &ctx) : Selection(ctx) {
+    this->opaque->setSlowByteGather(false);
   }
 
   Selection8::Selection8(GenContext &ctx) : Selection(ctx) {
     this->opaque->setHas32X32Mul(true);
     this->opaque->setHasLongType(true);
+    this->opaque->setSlowByteGather(false);
   }
 
   SelectionChv::SelectionChv(GenContext &ctx) : Selection(ctx) {
     this->opaque->setHas32X32Mul(true);
     this->opaque->setHasLongType(true);
     this->opaque->setLongRegRestrict(true);
+    this->opaque->setSlowByteGather(false);
   }
 
   Selection9::Selection9(GenContext &ctx) : Selection(ctx) {
     this->opaque->setHas32X32Mul(true);
     this->opaque->setHasLongType(true);
     this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL);
+    this->opaque->setSlowByteGather(false);
   }
 
   void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t msgNum,
@@ -3519,8 +3528,31 @@ namespace gbe
         GBE_ASSERT(insn.getValueNum() == 1);
         const GenRegister value = sel.selReg(insn.getValue(0), insn.getValueType());
         GBE_ASSERT(elemSize == GEN_BYTE_SCATTER_WORD || elemSize == GEN_BYTE_SCATTER_BYTE);
+        if(sel.getSlowByteGather())
+          readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
+        else {
+          GenRegister b = bti.isConst ? GenRegister::immud(bti.imm) : sel.selReg(bti.reg, ir::TYPE_U32);
+          GenRegister tmpFlag = sel.selReg(sel.reg(ir::FAMILY_WORD, true), ir::TYPE_U16);
+
+          // We need a temporary register if we read bytes or words
+          Register dst = sel.reg(FAMILY_DWORD, isUniform);
+          sel.push();
+            if (isUniform)
+              sel.curr.noMask = 1;
+            sel.BYTE_GATHER(sel.selReg(dst, ir::TYPE_U32), address, elemSize, b, bti.isConst ? NULL : & tmpFlag);
+          sel.pop();
 
-        readByteAsDWord(sel, elemSize, address, value, isUniform, bti);
+          sel.push();
+            if (isUniform) {
+              sel.curr.noMask = 1;
+              sel.curr.execWidth = 1;
+            }
+            if (elemSize == GEN_BYTE_SCATTER_WORD)
+              sel.MOV(GenRegister::retype(value, GEN_TYPE_UW), GenRegister::unpacked_uw(dst));
+            else if (elemSize == GEN_BYTE_SCATTER_BYTE)
+              sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
+          sel.pop();
+        }
       }
     }
 
-- 
1.8.3.2



More information about the Beignet mailing list