[Beignet] [PATCH V2 04/17] Backend: Fix barrier placement in workgroup functions
Grigore Lupescu
grigore.lupescu at intel.com
Mon Apr 11 14:35:52 UTC 2016
From: Grigore Lupescu <grigore.lupescu at intel.com>
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
backend/src/backend/gen_context.cpp | 15 +++++++++++----
backend/src/backend/gen_insn_selection.cpp | 13 ++++++++-----
2 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 7a1c3cc..056fe3a 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2625,6 +2625,7 @@ namespace gbe
GenRegister partialData = GenRegister::toUniform(threadData, dst.type);
GenRegister threadId = ra->genReg(insn.src(0));
GenRegister threadLoop = ra->genReg(insn.src(1));
+ GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
uint32_t wg_op = insn.extra.workgroupOp;
uint32_t simd = p->curr.execWidth;
@@ -2693,10 +2694,16 @@ namespace gbe
/* Init partialData register, it will hold the final result */
initValue(p, partialData, wg_op);
- p->FENCE(msgData);
- p->MOV(msgData, msgData);
- p->FENCE(msgData);
- p->MOV(msgData, msgData);
+ /* Add call to barrier */
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.physicalFlag = 0;
+ p->curr.noMask = 1;
+ p->AND(msgData, barrierId, GenRegister::immud(0x0f000000));
+ p->BARRIER(msgData);
+ p->curr.execWidth = 1;
+ p->WAIT();
+ p->pop();
/* Perform a loop, based on thread count (which is now multiple of 4) */
p->push();{
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 3fe0465..cffb016 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6470,11 +6470,14 @@ namespace gbe
for(uint32_t i = 0; i < 6; i++)
msg.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
+ /* insert a barrier to make sure all the var we are interested in
+ have been assigned the final value. */
+ sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+
/* compute individual slice of workitems, (e.g. 0->16 workitems) */
sel.MOV(slmOff, GenRegister::immud(insn.getSlmAddr()));
/* barrier for syn prior to workgroup */
- sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
sel.WORKGROUP_OP(workGroupOp, dst, src, data, threadId, threadN, tmp, slmOff, msg);
return true;
@@ -6496,10 +6499,6 @@ namespace gbe
GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
vector<GenRegister> fakeTemps;
- /* Then we insert a barrier to make sure all the var we are interested in
- have been assigned the final value. */
- sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
-
GBE_ASSERT(srcNum >= 2);
GenRegister coords[3];
for (uint32_t i = 1; i < srcNum; i++) {
@@ -6512,6 +6511,10 @@ namespace gbe
sel.MOV(addr, GenRegister::immud(slmAddr));
} sel.pop();
+ /* insert a barrier to make sure all the var we are interested in
+ have been assigned the final value. */
+ sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+
sel.push(); {
sel.curr.flag = 0;
sel.curr.subFlag = 1;
--
2.5.0
More information about the Beignet
mailing list