[Beignet] [PATCH] enable sends to write SLM for workgroup op
Pan, Xiuli
xiuli.pan at intel.com
Wed Dec 28 07:54:36 UTC 2016
LGTM.
-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Guo, Yejun
Sent: Friday, December 23, 2016 5:43 PM
To: beignet at lists.freedesktop.org
Cc: Guo, Yejun <yejun.guo at intel.com>
Subject: [Beignet] [PATCH] enable sends to write SLM for workgroup op
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
backend/src/backend/gen8_context.cpp | 12 +++----
backend/src/backend/gen_context.cpp | 8 ++---
backend/src/backend/gen_insn_selection.cpp | 50 +++++++++++++++++++++--------- backend/src/backend/gen_insn_selection.hpp | 5 ++-
4 files changed, 49 insertions(+), 26 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp
index a3045ce..eede52c 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -1738,7 +1738,7 @@ namespace gbe
GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
GenRegister localBarrier = ra->genReg(insn.src(5));
- uint32_t wg_op = insn.extra.workgroupOp;
+ uint32_t wg_op = insn.extra.wgop.workgroupOp;
uint32_t simd = p->curr.execWidth;
int32_t jip0, jip1;
@@ -1757,8 +1757,8 @@ namespace gbe
/* use of continuous GRF allocation from insn selection */
GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)), dst.type);
GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)), GEN_TYPE_UD);
- GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0), GEN_TYPE_UD);
- GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1), dst.type);
+ GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD);
+ GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)),
+ dst.type);
/* do some calculation within each thread */
wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p); @@ -1799,7 +1799,7 @@ namespace gbe
p->curr.execWidth = 8;
p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
+ p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 2,
+ insn.extra.wgop.splitSend);
}
else
{
@@ -1807,7 +1807,7 @@ namespace gbe
p->MOV(msgData, threadData);
p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false);
+ p->UNTYPED_WRITE(msgAddr, msgData, GenRegister::immw(0xFE), 1,
+ insn.extra.wgop.splitSend);
}
/* init partialData register, it will hold the final result */ @@ -1945,7 +1945,7 @@ namespace gbe
const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)), dst.type);
GenRegister threadData = ra->genReg(insn.src(1));
- uint32_t wg_op = insn.extra.workgroupOp;
+ uint32_t wg_op = insn.extra.wgop.workgroupOp;
uint32_t simd = p->curr.execWidth;
/* masked elements should be properly set to init value */ diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index c8019e3..5d8861b 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3252,7 +3252,7 @@ namespace gbe
GenRegister barrierId = ra->genReg(GenRegister::ud1grf(ir::ocl::barrierid));
GenRegister localBarrier = ra->genReg(insn.src(5));
- uint32_t wg_op = insn.extra.workgroupOp;
+ uint32_t wg_op = insn.extra.wgop.workgroupOp;
uint32_t simd = p->curr.execWidth;
int32_t jip0, jip1;
@@ -3271,8 +3271,8 @@ namespace gbe
/* use of continuous GRF allocation from insn selection */
GenRegister msg = GenRegister::retype(ra->genReg(insn.dst(2)), dst.type);
GenRegister msgSlmOff = GenRegister::retype(ra->genReg(insn.src(4)), GEN_TYPE_UD);
- GenRegister msgAddr = GenRegister::retype(GenRegister::offset(msg, 0), GEN_TYPE_UD);
- GenRegister msgData = GenRegister::retype(GenRegister::offset(msg, 1), dst.type);
+ GenRegister msgAddr = GenRegister::retype(msg, GEN_TYPE_UD);
+ GenRegister msgData = GenRegister::retype(ra->genReg(insn.dst(3)),
+ dst.type);
/* do some calculation within each thread */
wgOpPerformThread(dst, theVal, threadData, tmp, simd, wg_op, p); @@ -3459,7 +3459,7 @@ namespace gbe
const GenRegister theVal = GenRegister::retype(ra->genReg(insn.src(0)), dst.type);
GenRegister threadData = ra->genReg(insn.src(1));
- uint32_t wg_op = insn.extra.workgroupOp;
+ uint32_t wg_op = insn.extra.wgop.workgroupOp;
uint32_t simd = p->curr.execWidth;
/* masked elements should be properly set to init value */ diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 128c2bc..bcdba12 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -753,7 +753,7 @@ namespace gbe
GenRegister tmpData1,
GenRegister localThreadID, GenRegister localThreadNUM,
GenRegister tmpData2, GenRegister slmOff,
- vector<GenRegister> msg, uint32_t msgSizeReq,
+ vector<GenRegister> msg,
GenRegister localBarrier);
/*! Sub Group Operations */
void SUBGROUP_OP(uint32_t wg_op, Reg dst, GenRegister src, @@ -2255,19 +2255,11 @@ namespace gbe
GenRegister tmpData2,
GenRegister slmOff,
vector<GenRegister> msg,
- uint32_t msgSizeReq,
GenRegister localBarrier)
{
SelectionInstruction *insn = this->appendInsn(SEL_OP_WORKGROUP_OP, 2 + msg.size(), 6);
- SelectionVector *vector = this->appendVector();
- /* allocate continuous GRF registers for READ/WRITE to SLM */
- GBE_ASSERT(msg.size() >= msgSizeReq);
- vector->regNum = msg.size();
- vector->offsetID = 0;
- vector->reg = &insn->dst(2);
- vector->isSrc = 0;
- insn->extra.workgroupOp = wg_op;
+ insn->extra.wgop.workgroupOp = wg_op;
insn->dst(0) = dst;
insn->dst(1) = tmpData1;
@@ -2280,6 +2272,29 @@ namespace gbe
insn->src(3) = tmpData2;
insn->src(4) = slmOff;
insn->src(5) = localBarrier;
+
+ if (hasSends()) {
+ insn->extra.wgop.splitSend = 1;
+ SelectionVector *vector = this->appendVector();
+
+ vector->regNum = 1;
+ vector->offsetID = 2;
+ vector->reg = &insn->dst(2);
+ vector->isSrc = 0;
+
+ vector = this->appendVector();
+ vector->regNum = msg.size() - 1;
+ vector->offsetID = 3;
+ vector->reg = &insn->dst(3);
+ vector->isSrc = 0;
+ } else {
+ /* allocate continuous GRF registers for READ/WRITE to SLM */
+ SelectionVector *vector = this->appendVector();
+ vector->regNum = msg.size();
+ vector->offsetID = 2;
+ vector->reg = &insn->dst(2);
+ vector->isSrc = 0;
+ }
}
void Selection::Opaque::SUBGROUP_OP(uint32_t wg_op, @@ -2290,7 +2305,7 @@ namespace gbe
{
SelectionInstruction *insn = this->appendInsn(SEL_OP_SUBGROUP_OP, 2, 2);
- insn->extra.workgroupOp = wg_op;
+ insn->extra.wgop.workgroupOp = wg_op;
insn->dst(0) = dst;
insn->dst(1) = tmpData1;
@@ -7451,10 +7466,15 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
GenRegister localBarrier = GenRegister::ud8grf(sel.reg(FAMILY_DWORD));
/* Allocate registers for message sending
- * (read/write to shared local memory) */
+ * (read/write to shared local memory),
+ * only one data (ud/ul) is needed for thread communication,
+ * we will always use SIMD8 to do the read/write
+ */
vector<GenRegister> msg;
- for(uint32_t i = 0; i < 6; i++)
- msg.push_back(sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32));
+ msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG))); //address
+ msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG))); //data
+ if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L)
+ msg.push_back(GenRegister::ud8grf(sel.reg(ir::FAMILY_REG)));
+ //data
/* Insert a barrier to make sure all the var we are interested in
have been assigned the final value. */ @@ -7466,7 +7486,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp
/* Perform workgroup op */
sel.WORKGROUP_OP(workGroupOp, dst, src, tmpData1,
- localThreadID, localThreadNUM, tmpData2, slmOff, msg, 6,
+ localThreadID, localThreadNUM, tmpData2, slmOff,
+ msg,
localBarrier);
return true;
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 01999a2..8846372 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -159,7 +159,10 @@ namespace gbe
uint32_t continueFlag:8;
uint16_t printfSize;
};
- uint32_t workgroupOp;
+ struct {
+ uint16_t workgroupOp;
+ uint16_t splitSend:1;
+ }wgop;
} extra;
/*! Gen opcode */
uint8_t opcode;
--
1.9.1
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list