[Beignet] [PATCH V2 04/17] Backend: Fix barrier placement in workgroup functions

Grigore Lupescu grigore.lupescu at intel.com
Mon Apr 11 14:35:52 UTC 2016


From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_context.cpp        | 15 +++++++++++----
 backend/src/backend/gen_insn_selection.cpp | 13 ++++++++-----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 7a1c3cc..056fe3a 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2625,6 +2625,7 @@ namespace gbe
     GenRegister partialData = GenRegister::toUniform(threadData, dst.type);
     GenRegister threadId = ra->genReg(insn.src(0));
     GenRegister threadLoop = ra->genReg(insn.src(1));
+    GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
 
     uint32_t wg_op = insn.extra.workgroupOp;
     uint32_t simd = p->curr.execWidth;
@@ -2693,10 +2694,16 @@ namespace gbe
     /* Init partialData register, it will hold the final result */
     initValue(p, partialData, wg_op);
 
-    p->FENCE(msgData);
-    p->MOV(msgData, msgData);
-    p->FENCE(msgData);
-    p->MOV(msgData, msgData);
+    /* Add call to barrier */
+    p->push();
+      p->curr.execWidth = 8;
+      p->curr.physicalFlag = 0;
+      p->curr.noMask = 1;
+      p->AND(msgData, barrierId, GenRegister::immud(0x0f000000));
+      p->BARRIER(msgData);
+      p->curr.execWidth = 1;
+      p->WAIT();
+    p->pop();
 
     /* Perform a loop, based on thread count (which is now multiple of 4) */
     p->push();{
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 3fe0465..cffb016 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6470,11 +6470,14 @@ namespace gbe
       for(uint32_t i = 0; i < 6; i++)
         msg.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
 
+      /* insert a barrier to make sure all the var we are interested in
+         have been assigned the final value. */
+      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+
       /* compute individual slice of workitems, (e.g. 0->16 workitems) */
       sel.MOV(slmOff, GenRegister::immud(insn.getSlmAddr()));
 
       /* barrier for syn prior to workgroup */
-      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
       sel.WORKGROUP_OP(workGroupOp, dst, src, data, threadId, threadN, tmp, slmOff, msg);
 
       return true;
@@ -6496,10 +6499,6 @@ namespace gbe
       GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
       vector<GenRegister> fakeTemps;
 
-      /* Then we insert a barrier to make sure all the var we are interested in
-         have been assigned the final value. */
-      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
-
       GBE_ASSERT(srcNum >= 2);
       GenRegister coords[3];
       for (uint32_t i = 1; i < srcNum; i++) {
@@ -6512,6 +6511,10 @@ namespace gbe
         sel.MOV(addr, GenRegister::immud(slmAddr));
       } sel.pop();
 
+      /* insert a barrier to make sure all the var we are interested in
+         have been assigned the final value. */
+      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+
       sel.push(); {
         sel.curr.flag = 0;
         sel.curr.subFlag = 1;
-- 
2.5.0



More information about the Beignet mailing list