[Beignet] [PATCH 1/2] GBE: fix insntruction scheduling related bugs in read64/write64.

Song, Ruiling ruiling.song at intel.com
Fri Aug 9 00:43:12 PDT 2013


Patch 2 LGTM.
For patch 1, could you explain a little bit more? Why the post instruction scheduler behave incorrect when temporary registers in src array but correct in dst array?

-----Original Message-----
From: beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org [mailto:beignet-bounces+ruiling.song=intel.com at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Thursday, August 08, 2013 3:16 PM
To: beignet at lists.freedesktop.org
Cc: Zhigang Gong
Subject: [Beignet] [PATCH 1/2] GBE: fix insntruction scheduling related bugs in read64/write64.

In read64 and write64, we allocate some temporary registers and we should put all of those temporary registers may be modified to the instruction's dst array. Otherwise, the latter post instruction scheduling may rearrange the instruction incorrectly.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/backend/gen_context.cpp        |   13 ++++---
 backend/src/backend/gen_encoder.cpp        |    6 ---
 backend/src/backend/gen_insn_selection.cpp |   57 ++++++++++++++++------------
 3 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 621e7be..6de6cf1 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -601,10 +601,10 @@ namespace gbe
   void GenContext::emitRead64Instruction(const SelectionInstruction &insn) {
     const uint32_t elemNum = insn.extra.elem;
     const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
-    const GenRegister dst = ra->genReg(insn.dst(tmpRegSize));
-    const GenRegister tmp = ra->genReg(insn.dst(0));
+    const GenRegister tempAddr = ra->genReg(insn.dst(0));
+    const GenRegister dst = ra->genReg(insn.dst(tmpRegSize + 1));
+    const GenRegister tmp = ra->genReg(insn.dst(1));
     const GenRegister src = ra->genReg(insn.src(0));
-    const GenRegister tempAddr = ra->genReg(insn.src(1));
     const uint32_t bti = insn.extra.function;
     p->READ64(dst, tmp, tempAddr, src, bti, elemNum);
   }
@@ -621,11 +621,12 @@ namespace gbe
   //  then follow the real destination registers.
   //  For SIMD16, we allocate elemNum temporary registers from dst(0).
   void GenContext::emitWrite64Instruction(const SelectionInstruction &insn) {
-    const GenRegister src = ra->genReg(insn.src(0));
+    const GenRegister src = ra->genReg(insn.dst(0));
     const uint32_t elemNum = insn.extra.elem;
-    const uint32_t tmpRegSize = (p->curr.execWidth == 8) ? elemNum * 2 : elemNum;
-    const GenRegister data = ra->genReg(insn.src(tmpRegSize + 1));
+    const GenRegister addr = ra->genReg(insn.src(0)); //tmpRegSize + 1));
+    const GenRegister data = ra->genReg(insn.src(1));
     const uint32_t bti = insn.extra.function;
+    p->MOV(src, addr);
     p->WRITE64(src, data, bti, elemNum);
   }
 
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 4d6aa34..1a459e1 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -412,12 +412,6 @@ namespace gbe
       curr.noMask = originMask;
       this->UNTYPED_WRITE(msg, bti, elemNum);
     }
-    /* Let's restore the original message(addr) register. */
-    /* XXX could be optimized if we don't allocate the address to the header
-       position of the message. */
-    curr.predicate = GEN_PREDICATE_NONE;
-    curr.noMask = GEN_MASK_DISABLE;
-    ADD(msg, GenRegister::retype(msg, GEN_TYPE_UD), GenRegister::immd(-4));
     pop();
   }
 
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 1a3af68..a361ab3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -479,7 +479,7 @@ namespace gbe
     /*! Read 64 bits float/int array */
     void READ64(Reg addr, Reg tempAddr, const GenRegister *dst, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
     /*! Write 64 bits float/int array */
-    void WRITE64(Reg addr, const GenRegister *src, uint32_t elemNum, uint32_t valueNum, uint32_t bti);
+    void WRITE64(Reg addr, const GenRegister *src, uint32_t srcNum, 
+ const GenRegister *dst, uint32_t dstNum, uint32_t bti);
     /*! Untyped read (up to 4 elements) */
     void UNTYPED_READ(Reg addr, const GenRegister *dst, uint32_t elemNum, uint32_t bti);
     /*! Untyped write (up to 4 elements) */ @@ -825,28 +825,29 @@ namespace gbe
   /* elemNum contains all the temporary register and the
      real destination registers.*/
   void Selection::Opaque::READ64(Reg addr,
-                                       Reg tempAddr,
-                                       const GenRegister *dst,
-                                       uint32_t elemNum,
-                                       uint32_t valueNum,
-                                       uint32_t bti)
+                                 Reg tempAddr,
+                                 const GenRegister *dst,
+                                 uint32_t elemNum,
+                                 uint32_t valueNum,
+                                 uint32_t bti)
   {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, elemNum, 2);
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_READ64, 
+ elemNum + 1, 1);
     SelectionVector *srcVector = this->appendVector();
     SelectionVector *dstVector = this->appendVector();
 
+    /* temporary addr register is to be modified, set it to dst registers.*/
+    insn->dst(0) = tempAddr;
     // Regular instruction to encode
     for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
-      insn->dst(elemID) = dst[elemID];
+      insn->dst(elemID + 1) = dst[elemID];
     insn->src(0) = addr;
-    insn->src(1) = tempAddr;
     insn->extra.function = bti;
     insn->extra.elem = valueNum;
 
     // Only the temporary registers need contiguous allocation
     dstVector->regNum = elemNum - valueNum;
     dstVector->isSrc = 0;
-    dstVector->reg = &insn->dst(0);
+    dstVector->reg = &insn->dst(1);
 
     // Source cannot be scalar (yet)
     srcVector->regNum = 1;
@@ -883,24 +884,27 @@ namespace gbe
   /* elemNum contains all the temporary register and the
      real data registers.*/
   void Selection::Opaque::WRITE64(Reg addr,
-                                        const GenRegister *src,
-                                        uint32_t elemNum,
-                                        uint32_t valueNum,
-                                        uint32_t bti)
+                                  const GenRegister *src,
+                                  uint32_t srcNum,
+                                  const GenRegister *dst,
+                                  uint32_t dstNum,
+                                  uint32_t bti)
   {
-    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 0, elemNum+1);
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_WRITE64, 
+ dstNum, srcNum + 1);
     SelectionVector *vector = this->appendVector();
 
     // Regular instruction to encode
     insn->src(0) = addr;
-    for (uint32_t elemID = 0; elemID < elemNum; ++elemID)
-      insn->src(elemID+1) = src[elemID];
+    for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
+      insn->src(elemID + 1) = src[elemID];
+    for (uint32_t elemID = 0; elemID < dstNum; ++elemID)
+      insn->dst(elemID) = dst[elemID];
     insn->extra.function = bti;
-    insn->extra.elem = valueNum;
+    insn->extra.elem = srcNum;
 
     // Only the addr + temporary registers need to be contiguous.
-    vector->regNum = (elemNum - valueNum) + 1;
-    vector->reg = &insn->src(0);
+    vector->regNum = dstNum;
+    vector->reg = &insn->dst(0);
     vector->isSrc = 1;
   }
 
@@ -2085,13 +2089,16 @@ namespace gbe
       addr = GenRegister::retype(sel.selReg(insn.getSrc(addrID)), GEN_TYPE_F);
       // The first 16 DWORD register space is for temporary usage at encode stage.
       uint32_t tmpRegNum = (sel.ctx.getSimdWidth() == 8) ? valueNum * 2 : valueNum;
-      GenRegister src[valueNum + tmpRegNum];
+      GenRegister src[valueNum];
+      GenRegister dst[tmpRegNum + 1];
+      /* dst 0 is for the temporary address register. */
+      dst[0] = sel.selReg(sel.reg(FAMILY_DWORD));
       for (srcID = 0; srcID < tmpRegNum; ++srcID)
-        src[srcID] = sel.selReg(sel.reg(FAMILY_DWORD));
+        dst[srcID + 1] = sel.selReg(sel.reg(FAMILY_DWORD));
 
-      for (uint32_t valueID = 0; valueID < valueNum; ++srcID, ++valueID)
-        src[srcID] = sel.selReg(insn.getValue(valueID));
-      sel.WRITE64(addr, src, valueNum + tmpRegNum, valueNum, bti);
+      for (uint32_t valueID = 0; valueID < valueNum; ++valueID)
+        src[valueID] = sel.selReg(insn.getValue(valueID));
+      sel.WRITE64(addr, src, valueNum, dst, tmpRegNum + 1, bti);
     }
 
     void emitByteScatter(Selection::Opaque &sel,
--
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list