[Beignet] [PATCH 2/2] Backend: Fix workgroup broadcast, add initial support qword
Grigore Lupescu
grigore.lupescu at intel.com
Tue Apr 5 18:07:27 UTC 2016
From: Grigore Lupescu <grigore.lupescu at intel.com>
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 54 +++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 12 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 96cc215..103a70c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6482,10 +6482,10 @@ namespace gbe
INLINE bool emitWGBroadcast(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn) const {
/* 1. BARRIER Ensure all the threads have set the correct value for the var which will be broadcasted.
2. CMP IDs Compare the local IDs with the specified ones in the function call.
- 3. STORE Use flag to control the store of the var. Only the specified item will execute the store.
+ 3. STORE Use flag to control the store of the var. Only the specified item will execute the store.
4. BARRIER Ensure the specified value has been stored.
- 5. LOAD Load the stored value to all the dst value, the dst of all the items will have same value,
- so broadcasted. */
+ 5. LOAD Load the stored value to all the dst value, the dst of all the items will have same value,
+ so broadcasted. */
using namespace ir;
const Type type = insn.getType();
const GenRegister src = sel.selReg(insn.getSrc(0), type);
@@ -6502,7 +6502,7 @@ namespace gbe
GBE_ASSERT(srcNum >= 2);
GenRegister coords[3];
for (uint32_t i = 1; i < srcNum; i++) {
- coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
+ coords[i - 1] = GenRegister::toUniform(sel.selReg(insn.getSrc(i), TYPE_U32), GEN_TYPE_UD);
}
sel.push(); {
@@ -6511,6 +6511,8 @@ namespace gbe
sel.MOV(addr, GenRegister::immud(slmAddr));
} sel.pop();
+ sel.MOV(dst, GenRegister::immd(0x0));
+
sel.push(); {
sel.curr.flag = 0;
sel.curr.subFlag = 1;
@@ -6518,9 +6520,9 @@ namespace gbe
sel.curr.noMask = 1;
GenRegister lid0, lid1, lid2;
uint32_t dim = srcNum - 1;
- lid0 = sel.selReg(ir::ocl::lid0);
- lid1 = sel.selReg(ir::ocl::lid1);
- lid2 = sel.selReg(ir::ocl::lid2);
+ lid0 = GenRegister::retype(sel.selReg(ir::ocl::lid0, TYPE_U32), GEN_TYPE_UD);
+ lid1 = GenRegister::retype(sel.selReg(ir::ocl::lid1, TYPE_U32), GEN_TYPE_UD);
+ lid2 = GenRegister::retype(sel.selReg(ir::ocl::lid2, TYPE_U32), GEN_TYPE_UD);
sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
sel.curr.predicate = GEN_PREDICATE_NORMAL;
@@ -6529,22 +6531,50 @@ namespace gbe
if (dim >= 3)
sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
- if (typeSize(src.type) == 4) {
- GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
- GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
+ if (typeSize(src.type) <= 4) {
+ GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+ GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), fakeTemps);
}
+ /* TODO: work in progress QWORD */
+ else if (typeSize(src.type) == 8) {
+ GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+ vector<GenRegister> _src;
+ _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+ _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+ _src[0] = GenRegister::retype(src, GEN_TYPE_UD);
+ _src[1] = src.offset(src, 0, 4);
+ sel.UNTYPED_WRITE(_addr, &_src[0], 2, GenRegister::immw(0xfe), fakeTemps);
+ }
+ else
+ GBE_ASSERT(0);
+
} sel.pop();
/* Make sure the slm var have the valid value now */
sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
- if (typeSize(src.type) == 4) {
- sel.UNTYPED_READ(addr, &dst, 1, GenRegister::immw(0xfe), fakeTemps);
+ if (typeSize(src.type) <= 4) {
+ GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+ GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
+ sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
}
+ /* TODO: work in progress QWORD */
+ else if (typeSize(src.type) == 8) {
+ GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+ vector<GenRegister> _dst;
+ _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+ _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+ _dst[0] = dst.retype(dst.offset(dst, 0, 0), GEN_TYPE_UD);
+ _dst[1] = dst.retype(dst.offset(dst, 1, 0), GEN_TYPE_UD);
+ sel.UNTYPED_READ(_addr, &_dst[0], 2, GenRegister::immw(0xfe), fakeTemps);
+ }
+ else
+ GBE_ASSERT(0);
return true;
}
+
INLINE bool emitOne(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn, bool &markChildren) const
{
using namespace ir;
--
2.5.0
More information about the Beignet
mailing list