[Beignet] [PATCH 12/27] Overload the READ64 and WRITE64 function for Gen8

junyan.he at inbox.com junyan.he at inbox.com
Tue Jan 6 02:01:27 PST 2015


From: Junyan He <junyan.he at linux.intel.com>

We still use the old manner to read/write the long
type data as 2 elements of DW data. After/Before
read/write we will use the pack/unpacked function
to convert it.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen8_context.cpp       |  38 +++++++++++
 backend/src/backend/gen8_context.hpp       |   2 +
 backend/src/backend/gen_context.hpp        |   4 +-
 backend/src/backend/gen_insn_selection.cpp | 105 ++++++++++++++++++++++-------
 4 files changed, 124 insertions(+), 25 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index 85896df..276b8c5 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -109,4 +109,42 @@ namespace gbe
     p->pop();
   }
 
+  void Gen8Context::emitRead64Instruction(const SelectionInstruction &insn)
+  {
+    const uint32_t bti = insn.getbti();
+    const uint32_t elemNum = insn.extra.elem;
+    GBE_ASSERT(elemNum == 1);
+
+    const GenRegister addr = ra->genReg(insn.src(0));
+    const GenRegister tmp_dst = ra->genReg(insn.dst(0));
+
+    /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+       which we can not accept. We just fallback to 2 DW untyperead here. */
+    p->UNTYPED_READ(tmp_dst, addr, bti, elemNum*2);
+
+    for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+      GenRegister long_tmp = ra->genReg(insn.dst(elemID));
+      GenRegister the_long = ra->genReg(insn.dst(elemID + elemNum));
+      this->packLongVec(long_tmp, the_long, p->curr.execWidth);
+    }
+  }
+
+  void Gen8Context::emitWrite64Instruction(const SelectionInstruction &insn)
+  {
+    const uint32_t bti = insn.getbti();
+    const uint32_t elemNum = insn.extra.elem;
+    GBE_ASSERT(elemNum == 1);
+
+    const GenRegister addr = ra->genReg(insn.src(elemNum));
+
+    /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless,
+       which we can not accept. We just fallback to 2 DW untypewrite here. */
+    for (uint32_t elemID = 0; elemID < elemNum; elemID++) {
+      GenRegister the_long = ra->genReg(insn.src(elemID));
+      GenRegister long_tmp = ra->genReg(insn.src(elemNum + 1 + elemID));
+      this->unpackLongVec(the_long, long_tmp, p->curr.execWidth);
+    }
+
+    p->UNTYPED_WRITE(addr, bti, elemNum*2);
+  }
 }
diff --git a/backend/src/backend/gen8_context.hpp b/backend/src/backend/gen8_context.hpp
index 1b9125b..d9e547b 100644
--- a/backend/src/backend/gen8_context.hpp
+++ b/backend/src/backend/gen8_context.hpp
@@ -48,6 +48,8 @@ namespace gbe
     /*! Get the pointer argument size for curbe alloc */
     virtual uint32_t getPointerSize(void) { return 8; }
 
+    virtual void emitWrite64Instruction(const SelectionInstruction &insn);
+    virtual void emitRead64Instruction(const SelectionInstruction &insn);
   protected:
     virtual GenEncoder* generateEncoder(void) {
       return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 45347b9..317257b 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -148,8 +148,8 @@ namespace gbe
     void emitBarrierInstruction(const SelectionInstruction &insn);
     void emitFenceInstruction(const SelectionInstruction &insn);
     void emitMathInstruction(const SelectionInstruction &insn);
-    void emitRead64Instruction(const SelectionInstruction &insn);
-    void emitWrite64Instruction(const SelectionInstruction &insn);
+    virtual void emitRead64Instruction(const SelectionInstruction &insn);
+    virtual void emitWrite64Instruction(const SelectionInstruction &insn);
     void emitUntypedReadInstruction(const SelectionInstruction &insn);
     void emitUntypedWriteInstruction(const SelectionInstruction &insn);
     void emitAtomicInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index bf3613d..f78b049 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -569,9 +569,9 @@ namespace gbe
     /*! Atomic instruction */
     void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, uint32_t bti);
     /*! Read 64 bits float/int array */
-    void READ64(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
+    void READ64(Reg addr, const GenRegister *dst, const GenRegister *tmp, uint32_t elemNum, uint32_t bti, bool native_long);
     /*! Write 64 bits float/int array */
-    void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, uint32_t bti);
+    void WRITE64(Reg addr, const GenRegister *src, const GenRegister *tmp, uint32_t srcNum, uint32_t bti, bool native_long);
     /*! Untyped read (up to 4 elements) */
     void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
     /*! Untyped write (up to 4 elements) */
@@ -1127,16 +1127,29 @@ namespace gbe
 
   void Selection::Opaque::READ64(Reg addr,
                                  const GenRegister *dst,
+                                 const GenRegister *tmp,
                                  uint32_t elemNum,
-                                 uint32_t bti)
+                                 uint32_t bti,
+                                 bool native_long)
   {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
+    SelectionInstruction *insn = NULL;
     SelectionVector *srcVector = this->appendVector();
     SelectionVector *dstVector = this->appendVector();
 
-    // Regular instruction to encode
-    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
-      insn->dst(elemID) = dst[elemID];
+    if (!native_long) {
+      insn = this->appendInsn(SEL_OP_READ64, elemNum, 1);
+      // Regular instruction to encode
+      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+        insn->dst(elemID) = dst[elemID];
+    } else {
+      insn = this->appendInsn(SEL_OP_READ64, elemNum*2, 1);
+      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+        insn->dst(elemID) = tmp[elemID];
+
+      for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
+        insn->dst(elemID + elemNum) = dst[elemID];
+    }
+
     insn->src(0) = addr;
     insn->setbti(bti);
     insn->extra.elem = elemNum;
@@ -1179,23 +1192,50 @@ namespace gbe
 
   void Selection::Opaque::WRITE64(Reg addr,
                                   const GenRegister *src,
+                                  const GenRegister *tmp,
                                   uint32_t srcNum,
-                                  uint32_t bti)
+                                  uint32_t bti,
+                                  bool native_long)
   {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
     SelectionVector *vector = this->appendVector();
-
-    // Regular instruction to encode
-    insn->src(0) = addr;
-    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
-      insn->src(elemID + 1) = src[elemID];
-
-    insn->setbti(bti);
-    insn->extra.elem = srcNum;
-
-    vector->regNum = srcNum + 1;
-    vector->reg = &insn->src(0);
-    vector->isSrc = 1;
+    SelectionInstruction *insn = NULL;
+
+    if (!native_long) {
+      insn = this->appendInsn(SEL_OP_WRITE64, 0, srcNum + 1);
+      // Regular instruction to encode
+      insn->src(0) = addr;
+      for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+        insn->src(elemID + 1) = src[elemID];
+
+      insn->setbti(bti);
+      insn->extra.elem = srcNum;
+
+      vector->regNum = srcNum + 1;
+      vector->reg = &insn->src(0);
+      vector->isSrc = 1;
+    } else { // handle the native long case
+      insn = this->appendInsn(SEL_OP_WRITE64, srcNum, srcNum*2 + 1);
+
+      insn->src(0) = addr;
+      for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+        insn->src(elemID) = src[elemID];
+
+      insn->src(srcNum) = addr;
+      for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+        insn->src(srcNum + 1 + elemID) = tmp[0];
+
+      /* We also need to add the tmp reigster to dst, in order
+         to avoid the post schedule error . */
+      for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+        insn->dst(elemID) = tmp[0];
+
+      insn->setbti(bti);
+      insn->extra.elem = srcNum;
+
+      vector->regNum = srcNum + 1;
+      vector->reg = &insn->src(srcNum);
+      vector->isSrc = 1;
+    }
   }
 
   void Selection::Opaque::UNTYPED_WRITE(Reg addr,
@@ -2931,7 +2971,17 @@ namespace gbe
       GenRegister tmpAddr = getRelativeAddress(sel, addr, bti.bti[0]);
       for ( uint32_t dstID = 0; dstID < valueNum; ++dstID)
         dst[dstID] = sel.selReg(insn.getValue(dstID), ir::TYPE_U64);
-      sel.READ64(tmpAddr, dst.data(), valueNum, bti.bti[0]);
+
+      if (sel.hasLongType()) {
+        vector<GenRegister> tmp(valueNum);
+        for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+          tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
+        }
+
+        sel.READ64(tmpAddr, dst.data(), tmp.data(), valueNum, bti.bti[0], true);
+      } else {
+        sel.READ64(tmpAddr, dst.data(), NULL, valueNum, bti.bti[0], false);
+      }
     }
 
     void readByteAsDWord(Selection::Opaque &sel,
@@ -3246,7 +3296,16 @@ namespace gbe
 
       for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
         src[valueID] = sel.selReg(insn.getValue(valueID), ir::TYPE_U64);
-      sel.WRITE64(addr, src.data(), valueNum, bti);
+
+      if (sel.hasLongType()) {
+        vector<GenRegister> tmp(valueNum);
+        for (uint32_t valueID = 0; valueID < valueNum; ++valueID) {
+          tmp[valueID] = GenRegister::retype(sel.selReg(sel.reg(ir::FAMILY_QWORD), ir::TYPE_U64), GEN_TYPE_UL);
+        }
+        sel.WRITE64(addr, src.data(), tmp.data(), valueNum, bti, true);
+      } else {
+        sel.WRITE64(addr, src.data(), NULL, valueNum, bti, false);
+      }
     }
 
     void emitByteScatter(Selection::Opaque &sel,
-- 
1.9.1



More information about the Beignet mailing list