[Beignet] [PATCH 10/10] Backend: Initial support for long/ulong types in workgroup ops
grigore.lupescu at intel.com
grigore.lupescu at intel.com
Thu Mar 31 15:28:40 UTC 2016
From: Grigore Lupescu <grigore.lupescu at intel.com>
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
backend/src/backend/gen_context.cpp | 71 ++++++++++++++++++++----------
backend/src/backend/gen_insn_selection.cpp | 6 +--
2 files changed, 51 insertions(+), 26 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 31232dd..c5c27c6 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2663,22 +2663,27 @@ namespace gbe
p->MOV(threadLoop, ra->genReg(GenRegister::ud1grf(ir::ocl::threadid)));
}
- /* TODO implement communication for DW types */
- if(dst.type == GEN_TYPE_UL ||
- dst.type == GEN_TYPE_L ||
- dst.type == GEN_TYPE_DF_IMM)
+ /* All threads write the partial results to SLM memory */
+ if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L)
{
- p->curr.execWidth = 16;
- p->MOV(dst, threadData);
- return;
- }
+ GenRegister threadDataL = GenRegister::retype(threadData, GEN_TYPE_D);
+ GenRegister threadDataH = threadDataL.offset(threadDataL, 0, 4);
+ p->MOV(msgData.offset(msgData, 0), threadDataL);
+ p->MOV(msgData.offset(msgData, 1), threadDataH);
- /* All threads write the partial results to SLM memory */
- p->curr.execWidth = 8;
- p->MOV(msgData, threadData);
- p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
- p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
+ p->curr.execWidth = 8;
+ p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
+ p->ADD(msgAddr, msgAddr, msgSlmOff);
+ p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2);
+ }
+ else
+ {
+ p->curr.execWidth = 8;
+ p->MOV(msgData, threadData);
+ p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
+ p->ADD(msgAddr, msgAddr, msgSlmOff);
+ p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1);
+ }
/* Init partialData register, it will hold the final result */
initValue(p, partialData, wg_op);
@@ -2692,17 +2697,37 @@ namespace gbe
p->push();{
jip0 = p->n_instruction();
- p->curr.execWidth = 8;
- p->curr.predicate = GEN_PREDICATE_NONE;
-
/* Read in chunks of 4 to optimize SLM reads and reduce SEND messages */
- p->ADD(threadLoop, threadLoop, GenRegister::immd(-1));
- p->MUL(msgAddr, threadLoop, GenRegister::immd(0x4));
- p->ADD(msgAddr, msgAddr, msgSlmOff);
- p->UNTYPED_READ(msgData, msgAddr, GenRegister::immw(0xFE), 1);
+ if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L)
+ {
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->ADD(threadLoop, threadLoop, GenRegister::immd(-1));
+ p->MUL(msgAddr, threadLoop, GenRegister::immd(0x8));
+ p->ADD(msgAddr, msgAddr, msgSlmOff);
+ p->UNTYPED_READ(msgData, msgAddr, GenRegister::immw(0xFE), 2);
+
+ GenRegister msgDataL = msgData.retype(msgData.offset(msgData, 0, 4), GEN_TYPE_D);
+ GenRegister msgDataH = msgData.retype(msgData.offset(msgData, 1, 4), GEN_TYPE_D);
+ msgDataL.hstride = 2;
+ msgDataH.hstride = 2;
+ p->MOV(msgDataL, msgDataH);
+
+ /* Perform operation, partialData will hold result */
+ workgroupOp(partialData, partialData, msgData.offset(msgData, 0), wg_op, p);
+ }
+ else
+ {
+ p->curr.execWidth = 8;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->ADD(threadLoop, threadLoop, GenRegister::immd(-1));
+ p->MUL(msgAddr, threadLoop, GenRegister::immd(0x4));
+ p->ADD(msgAddr, msgAddr, msgSlmOff);
+ p->UNTYPED_READ(msgData, msgAddr, GenRegister::immw(0xFE), 1);
- /* Perform operation, process 4 elements, partialData will hold result */
- workgroupOp(partialData, partialData, msgData.offset(msgData, 0), wg_op, p);
+ /* Perform operation, partialData will hold result */
+ workgroupOp(partialData, partialData, msgData.offset(msgData, 0), wg_op, p);
+ }
/* While threadN is not 0, cycle read SLM / update value */
p->curr.noMask = 1;
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 12a0cf4..3fe0465 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6462,9 +6462,9 @@ namespace gbe
GBE_ASSERT(srcNum == 3);
GBE_ASSERT(insn.getSrc(0) == ir::ocl::threadn);
GBE_ASSERT(insn.getSrc(1) == ir::ocl::threadid);
- GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), type);
- GenRegister data = sel.selReg(sel.reg(FAMILY_DWORD), type);
- GenRegister slmOff = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), type);
+ GenRegister data = sel.selReg(sel.reg(FAMILY_QWORD), type);
+ GenRegister slmOff = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U32);
vector<GenRegister> msg;
for(uint32_t i = 0; i < 6; i++)
--
2.5.0
More information about the Beignet
mailing list