[Beignet] [PATCH V2 06/17] Backend: Fix workgroup broadcast, support for qword

Grigore Lupescu grigore.lupescu at intel.com
Mon Apr 11 14:37:02 UTC 2016


From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 80 +++++++++++++++++++++++++-----
 1 file changed, 67 insertions(+), 13 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cffb016..07bdef8 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6486,10 +6486,10 @@ namespace gbe
     INLINE bool emitWGBroadcast(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn) const {
       /*  1. BARRIER    Ensure all the threads have set the correct value for the var which will be broadcasted.
           2. CMP IDs    Compare the local IDs with the specified ones in the function call.
-          3. STORE	   Use flag to control the store of the var. Only the specified item will execute the store.
+          3. STORE         Use flag to control the store of the var. Only the specified item will execute the store.
           4. BARRIER    Ensure the specified value has been stored.
-          5. LOAD	   Load the stored value to all the dst value, the dst of all the items will have same value,
-          so broadcasted.	*/
+          5. LOAD          Load the stored value to all the dst value, the dst of all the items will have same value,
+          so broadcasted.       */
       using namespace ir;
       const Type type = insn.getType();
       const GenRegister src = sel.selReg(insn.getSrc(0), type);
@@ -6498,11 +6498,13 @@ namespace gbe
       const uint32_t slmAddr = insn.getSlmAddr();
       GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
       vector<GenRegister> fakeTemps;
+      fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
+      fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
 
       GBE_ASSERT(srcNum >= 2);
       GenRegister coords[3];
       for (uint32_t i = 1; i < srcNum; i++) {
-        coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
+        coords[i - 1] = GenRegister::toUniform(sel.selReg(insn.getSrc(i), TYPE_U32), GEN_TYPE_UD);
       }
 
       sel.push(); {
@@ -6522,9 +6524,9 @@ namespace gbe
         sel.curr.noMask = 1;
         GenRegister lid0, lid1, lid2;
         uint32_t dim = srcNum - 1;
-        lid0 = sel.selReg(ir::ocl::lid0);
-        lid1 = sel.selReg(ir::ocl::lid1);
-        lid2 = sel.selReg(ir::ocl::lid2);
+        lid0 = GenRegister::retype(sel.selReg(ir::ocl::lid0, TYPE_U32), GEN_TYPE_UD);
+        lid1 = GenRegister::retype(sel.selReg(ir::ocl::lid1, TYPE_U32), GEN_TYPE_UD);
+        lid2 = GenRegister::retype(sel.selReg(ir::ocl::lid2, TYPE_U32), GEN_TYPE_UD);
 
         sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
         sel.curr.predicate = GEN_PREDICATE_NORMAL;
@@ -6533,22 +6535,74 @@ namespace gbe
         if (dim >= 3)
           sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
 
-        if (typeSize(src.type) == 4) {
-          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
-          GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
+        /* write to SLM for BYTE/WORD/DWORD types */
+        if (typeSize(src.type) <= 4) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
           sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), fakeTemps);
         }
+        /* write to SLM for QWORD types */
+        else if (typeSize(src.type) == 8) {
+          sel.push(); {
+          /* arrange data in QWORD */
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister srcQW = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+          GenRegister srcQW_p1 = src.retype(srcQW, GEN_TYPE_UD);
+          GenRegister srcQW_p2 = src.retype(src.offset(srcQW, 2, 0), GEN_TYPE_UD);
+          vector<GenRegister> srcVec;
+          srcVec.push_back(srcQW_p1);
+          srcVec.push_back(srcQW_p2);
+
+          /* unpack into 2 DWORD */
+          sel.UNPACK_LONG(srcQW, src);
+
+          /* perform write to SLM */
+          sel.UNTYPED_WRITE(_addr, srcVec.data(), 2, GenRegister::immw(0xfe), fakeTemps);
+          }sel.pop();
+        }
+        else
+          GBE_ASSERT(0);
+
       } sel.pop();
-      /* Make sure the slm var have the valid value now */
+      /* make sure the slm var have the valid value now */
       sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
 
-      if (typeSize(src.type) == 4) {
-        sel.UNTYPED_READ(addr, &dst, 1, GenRegister::immw(0xfe), fakeTemps);
+      /* read from SLM for BYTE/WORD/DWORD types */
+      if (typeSize(src.type) <= 4) {
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
+        sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
+      }
+      /* read from SLM for QWORD types */
+      else if (typeSize(src.type) == 8) {
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        vector<GenRegister> _dst;
+        _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+        _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+        GenRegister _dstQ = dst.toUniform(_dst[0], GEN_TYPE_UL);
+
+        sel.push(); {
+        /* read from SLM */
+        sel.curr.execWidth = 8;
+        sel.UNTYPED_READ(_addr, _dst.data(), 2, GenRegister::immw(0xfe), fakeTemps);
+
+        /* reconstruct QWORD type */
+        _dst[0] = dst.toUniform(dst.offset(_dst[0], 0, 4), GEN_TYPE_UD);
+        _dst[1] = dst.toUniform(_dst[1], GEN_TYPE_UD);
+        sel.curr.execWidth = 1;
+        sel.MOV(_dst[0], _dst[1]);
+        } sel.pop();
+
+        /* set all elements assigned to thread */
+        sel.MOV(dst, _dstQ);
       }
+      else
+        GBE_ASSERT(0);
 
       return true;
     }
 
+
     INLINE bool emitOne(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
-- 
2.5.0



More information about the Beignet mailing list