[Beignet] [PATCH] Backend: Fix workgroup broadcast on QWORD, 1D works

Grigore Lupescu grigore.lupescu at intel.com
Thu Apr 7 18:18:10 UTC 2016


From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 62 +++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 103a70c..c872ad9 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6494,6 +6494,8 @@ namespace gbe
       const uint32_t slmAddr = insn.getSlmAddr();
       GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
       vector<GenRegister> fakeTemps;
+      fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
+      fakeTemps.push_back(sel.selReg(sel.reg(FAMILY_DWORD), type));
 
       /* Then we insert a barrier to make sure all the var we are interested in
          have been assigned the final value. */
@@ -6511,8 +6513,6 @@ namespace gbe
         sel.MOV(addr, GenRegister::immud(slmAddr));
       } sel.pop();
 
-      sel.MOV(dst, GenRegister::immd(0x0));
-
       sel.push(); {
         sel.curr.flag = 0;
         sel.curr.subFlag = 1;
@@ -6531,42 +6531,66 @@ namespace gbe
         if (dim >= 3)
           sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
 
+        /* write to SLM for BYTE/WORD/DWORD types */
         if (typeSize(src.type) <= 4) {
           GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
           GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
           sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), fakeTemps);
         }
-        /* TODO: work in progress QWORD */
+        /* write to SLM for QWORD types */
         else if (typeSize(src.type) == 8) {
-            GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
-            vector<GenRegister> _src;
-            _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
-            _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
-            _src[0] = GenRegister::retype(src, GEN_TYPE_UD);
-            _src[1] = src.offset(src, 0, 4);
-            sel.UNTYPED_WRITE(_addr, &_src[0], 2, GenRegister::immw(0xfe), fakeTemps);
+          sel.push(); {
+          /* arrange data in QWORD */
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister srcQW = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U64);
+          GenRegister srcQW_p1 = src.retype(srcQW, GEN_TYPE_UD);
+          GenRegister srcQW_p2 = src.retype(src.offset(srcQW, 2, 0), GEN_TYPE_UD);
+          vector<GenRegister> srcVec;
+          srcVec.push_back(srcQW_p1);
+          srcVec.push_back(srcQW_p2);
+
+          /* unpack into 2 DWORD */
+          sel.UNPACK_LONG(srcQW, src);
+
+          /* perform write to SLM */
+          sel.UNTYPED_WRITE(_addr, srcVec.data(), 2, GenRegister::immw(0xfe), fakeTemps);
+          }sel.pop();
         }
         else
           GBE_ASSERT(0);
 
       } sel.pop();
-      /* Make sure the slm var have the valid value now */
+      /* make sure the slm var have the valid value now */
       sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
 
+      /* read from SLM for BYTE/WORD/DWORD types */
       if (typeSize(src.type) <= 4) {
         GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
         GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
         sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
       }
-      /* TODO: work in progress QWORD */
+      /* read from SLM for QWORD types */
       else if (typeSize(src.type) == 8) {
-          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
-          vector<GenRegister> _dst;
-          _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
-          _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
-          _dst[0] = dst.retype(dst.offset(dst, 0, 0), GEN_TYPE_UD);
-          _dst[1] = dst.retype(dst.offset(dst, 1, 0), GEN_TYPE_UD);
-          sel.UNTYPED_READ(_addr, &_dst[0], 2, GenRegister::immw(0xfe), fakeTemps);
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        vector<GenRegister> _dst;
+        _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+        _dst.push_back(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U32));
+        GenRegister _dstQ = dst.toUniform(_dst[0], GEN_TYPE_UL);
+
+        sel.push(); {
+        /* read from SLM */
+        sel.curr.execWidth = 8;
+        sel.UNTYPED_READ(_addr, _dst.data(), 2, GenRegister::immw(0xfe), fakeTemps);
+
+        /* reconstruct QWORD type */
+        _dst[0] = dst.toUniform(dst.offset(_dst[0], 0, 4), GEN_TYPE_UD);
+        _dst[1] = dst.toUniform(_dst[1], GEN_TYPE_UD);
+        sel.curr.execWidth = 1;
+        sel.MOV(_dst[0], _dst[1]);
+        } sel.pop();
+
+        /* set all elements assigned to thread */
+        sel.MOV(dst, _dstQ);
       }
       else
         GBE_ASSERT(0);
-- 
2.5.0



More information about the Beignet mailing list