[Beignet] [PATCH 2/2] Backend: Fix workgroup broadcast, add initial support qword

Grigore Lupescu grigore.lupescu at intel.com
Tue Apr 5 18:07:27 UTC 2016


From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_insn_selection.cpp | 54 +++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 96cc215..103a70c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6482,10 +6482,10 @@ namespace gbe
     INLINE bool emitWGBroadcast(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn) const {
       /*  1. BARRIER    Ensure all the threads have set the correct value for the var which will be broadcasted.
           2. CMP IDs    Compare the local IDs with the specified ones in the function call.
-          3. STORE	   Use flag to control the store of the var. Only the specified item will execute the store.
+          3. STORE         Use flag to control the store of the var. Only the specified item will execute the store.
           4. BARRIER    Ensure the specified value has been stored.
-          5. LOAD	   Load the stored value to all the dst value, the dst of all the items will have same value,
-          so broadcasted.	*/
+          5. LOAD          Load the stored value to all the dst value, the dst of all the items will have same value,
+          so broadcasted.       */
       using namespace ir;
       const Type type = insn.getType();
       const GenRegister src = sel.selReg(insn.getSrc(0), type);
@@ -6502,7 +6502,7 @@ namespace gbe
       GBE_ASSERT(srcNum >= 2);
       GenRegister coords[3];
       for (uint32_t i = 1; i < srcNum; i++) {
-        coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
+        coords[i - 1] = GenRegister::toUniform(sel.selReg(insn.getSrc(i), TYPE_U32), GEN_TYPE_UD);
       }
 
       sel.push(); {
@@ -6511,6 +6511,8 @@ namespace gbe
         sel.MOV(addr, GenRegister::immud(slmAddr));
       } sel.pop();
 
+      sel.MOV(dst, GenRegister::immd(0x0));
+
       sel.push(); {
         sel.curr.flag = 0;
         sel.curr.subFlag = 1;
@@ -6518,9 +6520,9 @@ namespace gbe
         sel.curr.noMask = 1;
         GenRegister lid0, lid1, lid2;
         uint32_t dim = srcNum - 1;
-        lid0 = sel.selReg(ir::ocl::lid0);
-        lid1 = sel.selReg(ir::ocl::lid1);
-        lid2 = sel.selReg(ir::ocl::lid2);
+        lid0 = GenRegister::retype(sel.selReg(ir::ocl::lid0, TYPE_U32), GEN_TYPE_UD);
+        lid1 = GenRegister::retype(sel.selReg(ir::ocl::lid1, TYPE_U32), GEN_TYPE_UD);
+        lid2 = GenRegister::retype(sel.selReg(ir::ocl::lid2, TYPE_U32), GEN_TYPE_UD);
 
         sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
         sel.curr.predicate = GEN_PREDICATE_NORMAL;
@@ -6529,22 +6531,50 @@ namespace gbe
         if (dim >= 3)
           sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
 
-        if (typeSize(src.type) == 4) {
-          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
-          GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
+        if (typeSize(src.type) <= 4) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          GenRegister _src = GenRegister::retype(src, GEN_TYPE_UD);
           sel.UNTYPED_WRITE(_addr, &_src, 1, GenRegister::immw(0xfe), fakeTemps);
         }
+        /* TODO: work in progress QWORD */
+        else if (typeSize(src.type) == 8) {
+            GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+            vector<GenRegister> _src;
+            _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+            _src.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+            _src[0] = GenRegister::retype(src, GEN_TYPE_UD);
+            _src[1] = src.offset(src, 0, 4);
+            sel.UNTYPED_WRITE(_addr, &_src[0], 2, GenRegister::immw(0xfe), fakeTemps);
+        }
+        else
+          GBE_ASSERT(0);
+
       } sel.pop();
       /* Make sure the slm var have the valid value now */
       sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
 
-      if (typeSize(src.type) == 4) {
-        sel.UNTYPED_READ(addr, &dst, 1, GenRegister::immw(0xfe), fakeTemps);
+      if (typeSize(src.type) <= 4) {
+        GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+        GenRegister _dst = GenRegister::retype(dst, GEN_TYPE_UD);
+        sel.UNTYPED_READ(_addr, &_dst, 1, GenRegister::immw(0xfe), fakeTemps);
       }
+      /* TODO: work in progress QWORD */
+      else if (typeSize(src.type) == 8) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_UD);
+          vector<GenRegister> _dst;
+          _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+          _dst.push_back(sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32));
+          _dst[0] = dst.retype(dst.offset(dst, 0, 0), GEN_TYPE_UD);
+          _dst[1] = dst.retype(dst.offset(dst, 1, 0), GEN_TYPE_UD);
+          sel.UNTYPED_READ(_addr, &_dst[0], 2, GenRegister::immw(0xfe), fakeTemps);
+      }
+      else
+        GBE_ASSERT(0);
 
       return true;
     }
 
+
     INLINE bool emitOne(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn, bool &markChildren) const
     {
       using namespace ir;
-- 
2.5.0



More information about the Beignet mailing list