[Beignet] [PATCH] enable sends to write SLM for workgroup op

Guo, Yejun yejun.guo at intel.com
Fri Dec 23 09:43:27 UTC 2016


Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
 backend/src/backend/gen8_context.cpp       | 12 +++----
 backend/src/backend/gen_context.cpp        |  8 ++---
 backend/src/backend/gen_insn_selection.cpp | 50 +++++++++++++++++++++---------
 backend/src/backend/gen_insn_selection.hpp |  5 ++-
 4 files changed, 49 insertions(+), 26 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index a3045ce..eede52c 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -1738,7 +1738,7 @@ namespace gbe
     GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
     GenRegister localBarrier = ra->genReg(insn.src(5));
 
-    uint32_t wg_op = insn.extra.workgroupOp;
+    uint32_t wg_op = insn.extra.wgop.workgroupOp;
     uint32_t simd = p->curr.execWidth;
     int32_t jip0, jip1;
 
@@ -1757,8 +1757,8 @@ namespace gbe
     /* use of continuous GRF allocation from insn selection */
     GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)), dst.type);
     GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)), GEN_TYPE_UD);
-    GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0), GEN_TYPE_UD);
-    GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1), dst.type);
+    GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD);
+    GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)), dst.type);
 
     /* do some calculation within each thread */
     wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p);
@@ -1799,7 +1799,7 @@ namespace gbe
       p->curr.execWidth = 8;
       p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
+      p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 2, insn.extra.wgop.splitSend);
     }
     else
     {
@@ -1807,7 +1807,7 @@ namespace gbe
       p->MOV(msgData, threadData);
       p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false);
+      p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 1, insn.extra.wgop.splitSend);
     }
 
     /* init partialData register, it will hold the final result */
@@ -1945,7 +1945,7 @@ namespace gbe
     const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)), dst.type);
     GenRegister threadData = ra->genReg(insn.src(1));
 
-    uint32_t wg_op = insn.extra.workgroupOp;
+    uint32_t wg_op = insn.extra.wgop.workgroupOp;
     uint32_t simd = p->curr.execWidth;
 
     /* masked elements should be properly set to init value */
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index c8019e3..5d8861b 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3252,7 +3252,7 @@ namespace gbe
     GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
     GenRegister localBarrier = ra->genReg(insn.src(5));
 
-    uint32_t wg_op = insn.extra.workgroupOp;
+    uint32_t wg_op = insn.extra.wgop.workgroupOp;
     uint32_t simd = p->curr.execWidth;
     int32_t jip0, jip1;
 
@@ -3271,8 +3271,8 @@ namespace gbe
     /* use of continuous GRF allocation from insn selection */
     GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)), dst.type);
     GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)), GEN_TYPE_UD);
-    GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0), GEN_TYPE_UD);
-    GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1), dst.type);
+    GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD);
+    GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)), dst.type);
 
     /* do some calculation within each thread */
     wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p);
@@ -3459,7 +3459,7 @@ namespace gbe
     const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)), dst.type);
     GenRegister threadData = ra->genReg(insn.src(1));
 
-    uint32_t wg_op = insn.extra.workgroupOp;
+    uint32_t wg_op = insn.extra.wgop.workgroupOp;
     uint32_t simd = p->curr.execWidth;
 
     /* masked elements should be properly set to init value */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 128c2bc..bcdba12 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -753,7 +753,7 @@ namespace gbe
                       GenRegister tmpData1,
                       GenRegister localThreadID, GenRegister localThreadNUM,
                       GenRegister tmpData2, GenRegister slmOff,
-                      vector<GenRegister> msg, uint32_t msgSizeReq,
+                      vector<GenRegister> msg,
                       GenRegister localBarrier);
     /*! Sub Group Operations */
     void SUBGROUP_OP(uint32_t wg_op, Reg dst, GenRegister src,
@@ -2255,19 +2255,11 @@ namespace gbe
                                        GenRegister tmpData2,
                                        GenRegister slmOff,
                                        vector<GenRegister> msg,
-                                       uint32_t msgSizeReq,
                                        GenRegister localBarrier)
   {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_WORKGROUP_OP, 2 + msg.size(), 6);
-    SelectionVector *vector = this->appendVector();
 
-    /* allocate continuous GRF registers for READ/WRITE to SLM */
-    GBE_ASSERT(msg.size() >= msgSizeReq);
-    vector->regNum = msg.size();
-    vector->offsetID = 0;
-    vector->reg = &insn->dst(2);
-    vector->isSrc = 0;
-    insn->extra.workgroupOp = wg_op;
+    insn->extra.wgop.workgroupOp = wg_op;
 
     insn->dst(0) = dst;
     insn->dst(1) = tmpData1;
@@ -2280,6 +2272,29 @@ namespace gbe
     insn->src(3) = tmpData2;
     insn->src(4) = slmOff;
     insn->src(5) = localBarrier;
+
+    if (hasSends()) {
+      insn->extra.wgop.splitSend = 1;
+      SelectionVector *vector = this->appendVector();
+
+      vector->regNum = 1;
+      vector->offsetID = 2;
+      vector->reg = &insn->dst(2);
+      vector->isSrc = 0;
+
+      vector = this->appendVector();
+      vector->regNum = msg.size() - 1;
+      vector->offsetID = 3;
+      vector->reg = &insn->dst(3);
+      vector->isSrc = 0;
+    } else {
+      /* allocate continuous GRF registers for READ/WRITE to SLM */
+      SelectionVector *vector = this->appendVector();
+      vector->regNum = msg.size();
+      vector->offsetID = 2;
+      vector->reg = &insn->dst(2);
+      vector->isSrc = 0;
+    }
   }
 
   void Selection::Opaque::SUBGROUP_OP(uint32_t wg_op,
@@ -2290,7 +2305,7 @@ namespace gbe
   {
     SelectionInstruction *insn = this->appendInsn(SEL_OP_SUBGROUP_OP, 2, 2);
 
-    insn->extra.workgroupOp = wg_op;
+    insn->extra.wgop.workgroupOp = wg_op;
 
     insn->dst(0) = dst;
     insn->dst(1) = tmpData1;
@@ -7451,10 +7466,15 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
       GenRegister localBarrier = GenRegister::ud8grf(sel.reg(FAMILY_DWORD));
 
       /* Allocate registers for message sending
-       * (read/write to shared local memory) */
+       * (read/write to shared local memory),
+       * only one data (ud/ul) is needed for thread communication,
+       * we will always use SIMD8 to do the read/write
+       */
       vector<GenRegister> msg;
-      for(uint32_t i = 0; i < 6; i++)
-        msg.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
+      msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));  //address
+      msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));  //data
+      if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L)
+        msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));  //data
 
       /* Insert a barrier to make sure all the var we are interested in
          have been assigned the final value. */
@@ -7466,7 +7486,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
 
       /* Perform workgroup op */
       sel.WORKGROUP_OP(workGroupOp, dst, src, tmpData1,
-                       localThreadID, localThreadNUM, tmpData2, slmOff, msg, 6,
+                       localThreadID, localThreadNUM, tmpData2, slmOff, msg,
                        localBarrier);
 
       return true;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 01999a2..8846372 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -159,7 +159,10 @@ namespace gbe
         uint32_t continueFlag:8;
         uint16_t printfSize;
       };
-      uint32_t workgroupOp;
+      struct {
+        uint16_t workgroupOp;
+        uint16_t splitSend:1;
+      }wgop;
     } extra;
     /*! Gen opcode */
     uint8_t opcode;
-- 
1.9.1



More information about the Beignet mailing list