[Beignet] [PATCH 4/5 OpenCL-2.0] Handle the WorkGroup_Broadcast logic in insn_selection.

junyan.he at inbox.com junyan.he at inbox.com
Mon Apr 20 23:11:40 PDT 2015


From: Junyan He <junyan.he at linux.intel.com>

We use slm to store the value which will be broadcasted
to the whole work group.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/backend/gen_insn_selection.cpp |   92 ++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index c240261..f8f1d29 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -4107,6 +4107,97 @@ namespace gbe
     DECL_CTOR(AtomicInstruction, 1, 1);
   };
 
+  /*! WorkGroup instruction pattern */
+  DECL_PATTERN(WorkGroupInstruction)
+  {
+    INLINE bool emitWGBroadcast(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn) const {
+      /*  1. BARRIER    Ensure all the threads have set the correct value for the var which will be broadcasted.
+          2. CMP IDs    Compare the local IDs with the specified ones in the function call.
+          3. STORE      Use flag to control the store of the var. Only the specified item will execute the store.
+          4. BARRIER    Ensure the specified value has been stored.
+          5. LOAD       Load the stored value to all the dst value, the dst of all the items will have same value,
+                        so broadcasted.  */
+      using namespace ir;
+      const Type type = insn.getType();
+      const GenRegister src = sel.selReg(insn.getSrc(0), type);
+      const GenRegister dst = sel.selReg(insn.getDst(0), type);
+      const uint32_t srcNum = insn.getSrcNum();
+      const uint32_t simdWidth = sel.ctx.getSimdWidth();
+      const uint32_t slmAddr = insn.getSlmAddr();
+      GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+
+      /* Then we insert a barrier to make sure all the var we are interested in
+         have been assigned the final value. */
+      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+
+      GBE_ASSERT(srcNum >= 2);
+      GenRegister coords[3];
+      for (uint32_t i = 1; i < srcNum; i++) {
+        coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
+      }
+
+      sel.push(); {
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+        sel.MOV(addr, GenRegister::immud(slmAddr));
+      } sel.pop();
+
+      sel.push(); {
+        sel.curr.flag = 0;
+        sel.curr.subFlag = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.curr.noMask = 1;
+        GenRegister lid0, lid1, lid2;
+        uint32_t dim = srcNum - 1;
+        if (simdWidth == 16) {
+          lid0 = GenRegister::ud16grf(ir::ocl::lid0);
+          lid1 = GenRegister::ud16grf(ir::ocl::lid1);
+          lid2 = GenRegister::ud16grf(ir::ocl::lid2);
+        } else {
+          lid0 = GenRegister::ud8grf(ir::ocl::lid0);
+          lid1 = GenRegister::ud8grf(ir::ocl::lid1);
+          lid2 = GenRegister::ud8grf(ir::ocl::lid2);
+        }
+
+        sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+        sel.curr.predicate = GEN_PREDICATE_NORMAL;
+        if (dim >= 2)
+          sel.CMP(GEN_CONDITIONAL_EQ, coords[1], lid1, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+        if (dim >= 3)
+          sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2, GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+
+        if (typeSize(src.type) == 4) {
+          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
+          GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
+          sel.UNTYPED_WRITE(_addr, &_src, 1, 0xfe);
+        }
+      } sel.pop();
+
+      /* Make sure the slm var have the valid value now */
+      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
+
+      if (typeSize(src.type) == 4) {
+        sel.UNTYPED_READ(addr, &dst, 1, 0xfe);
+      }
+
+      return true;
+    }
+
+    INLINE bool emitOne(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn, bool &markChildren) const
+    {
+      using namespace ir;
+      const WorkGroupOps workGroupOp = insn.getWorkGroupOpcode();
+
+      if (workGroupOp == WORKGROUP_OP_BROADCAST) {
+        return emitWGBroadcast(sel, insn);
+      } else {
+        GBE_ASSERT(0);
+      }
+      return true;
+    }
+    DECL_CTOR(WorkGroupInstruction, 1, 1);
+  };
+
   /*! Select instruction pattern */
   class SelectInstructionPattern : public SelectionPattern
   {
@@ -4789,6 +4880,7 @@ namespace gbe
     this->insert<GetImageInfoInstructionPattern>();
     this->insert<ReadARFInstructionPattern>();
     this->insert<RegionInstructionPattern>();
+    this->insert<WorkGroupInstructionPattern>();
 
     // Sort all the patterns with the number of instructions they output
     for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
-- 
1.7.9.5



More information about the Beignet mailing list