[Beignet] [PATCH] Backend: Fix workgroup broadcast on QWORD, 1D works
Grigore Lupescu
grigore.lupescu at intel.com
Thu Apr 7 18:18:10 UTC 2016
From: Grigore Lupescu <grigore.lupescu at intel.com>
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 62 +++++++++++++++++++++---------
1 file changed, 43 insertions(+), 19 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 103a70c..c872ad9 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6494,6 +6494,8 @@ namespace gbe
const uint32_t slmAddr = insn.getSlmAddr();
GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
vector<GenRegister> fakeTemps;
+ fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
+ fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
/* Then we insert a barrier to make sure all the var we are interested in
have been assigned the final value. */
@@ -6511,8 +6513,6 @@ namespace gbe
sel.MOV(addr, GenRegister::immud(slmAddr));
} sel.pop();
- sel.MOV(dst, GenRegister::immd(0x0));
-
sel.push(); {
sel.curr.flag = 0;
sel.curr.subFlag = 1;
@@ -6531,42 +6531,66 @@ namespace gbe
if (dim >= 3)
sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ /* write to SLM for BYTE/WORD/DWORD types */
if (typeSize(src.type) <= 4) {
GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), fakeTemps);
}
- /* TODO: work in progress QWORD */
+ /* write to SLM for QWORD types */
else if (typeSize(src.type) == 8) {
- GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
- vector<GenRegister> _src;
- _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
- _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
- _src[0] = GenRegister::retype(src, GEN_TYPE_UD);
- _src[1] = src.offset(src, 0, 4);
- sel.UNTYPED_WRITE(_addr, &_src[0], 2, GenRegister::immw(0xfe), fakeTemps);
+ sel.push(); {
+ /* arrange data in QWORD */
+ GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+ GenRegister srcQW = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+ GenRegister srcQW_p1 = src.retype(srcQW, GEN_TYPE_UD);
+ GenRegister srcQW_p2 = src.retype(src.offset(srcQW, 2, 0), GEN_TYPE_UD);
+ vector<GenRegister> srcVec;
+ srcVec.push_back(srcQW_p1);
+ srcVec.push_back(srcQW_p2);
+
+ /* unpack into 2 DWORD */
+ sel.UNPACK_LONG(srcQW, src);
+
+ /* perform write to SLM */
+ sel.UNTYPED_WRITE(_addr, srcVec.data(), 2, GenRegister::immw(0xfe), fakeTemps);
+ }sel.pop();
}
else
GBE_ASSERT(0);
} sel.pop();
- /* Make sure the slm var have the valid value now */
+ /* make sure the slm var have the valid value now */
sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+ /* read from SLM for BYTE/WORD/DWORD types */
if (typeSize(src.type) <= 4) {
GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
}
- /* TODO: work in progress QWORD */
+ /* read from SLM for QWORD types */
else if (typeSize(src.type) == 8) {
- GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
- vector<GenRegister> _dst;
- _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
- _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
- _dst[0] = dst.retype(dst.offset(dst, 0, 0), GEN_TYPE_UD);
- _dst[1] = dst.retype(dst.offset(dst, 1, 0), GEN_TYPE_UD);
- sel.UNTYPED_READ(_addr, &_dst[0], 2, GenRegister::immw(0xfe), fakeTemps);
+ GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+ vector<GenRegister> _dst;
+ _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+ _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+ GenRegister _dstQ = dst.toUniform(_dst[0], GEN_TYPE_UL);
+
+ sel.push(); {
+ /* read from SLM */
+ sel.curr.execWidth = 8;
+ sel.UNTYPED_READ(_addr, _dst.data(), 2, GenRegister::immw(0xfe), fakeTemps);
+
+ /* reconstruct QWORD type */
+ _dst[0] = dst.toUniform(dst.offset(_dst[0], 0, 4), GEN_TYPE_UD);
+ _dst[1] = dst.toUniform(_dst[1], GEN_TYPE_UD);
+ sel.curr.execWidth = 1;
+ sel.MOV(_dst[0], _dst[1]);
+ } sel.pop();
+
+ /* set all elements assigned to thread */
+ sel.MOV(dst, _dstQ);
}
else
GBE_ASSERT(0);
--
2.5.0
More information about the Beignet
mailing list