[Beignet] [PATCH] Backend: Fix workgroup all operation by selective mask

Grigore Lupescu grigore.lupescu at intel.com
Sun Apr 10 14:41:58 UTC 2016


From: Grigore Lupescu <grigore.lupescu at intel.com>

Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_context.cpp        |  3 +--
 backend/src/backend/gen_insn_selection.cpp | 32 ++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index bf66295..9098a3f 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2390,8 +2390,7 @@ namespace gbe
     if (wg_op == ir::WORKGROUP_OP_ALL)
     {
       if (dataReg.type == GEN_TYPE_D
-          || dataReg.type == GEN_TYPE_UD
-          || dataReg.type == GEN_TYPE_F)
+          || dataReg.type == GEN_TYPE_UD)
         p->MOV(dataReg, GenRegister::immd(0xFFFFFFFF));
       else if(dataReg.type == GEN_TYPE_L ||
           dataReg.type == GEN_TYPE_UL)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 07bdef8..52871b1 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6474,6 +6474,38 @@ namespace gbe
          have been assigned the final value. */
       sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)), sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
 
+      /* FIX: workgroup all operation by masking correctly
+       * Temporary fix, assume execution size 16 */
+      if(workGroupOp == ir::WORKGROUP_OP_ALL)
+      {
+        GenRegister lsize = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+        GenRegister lsizelw = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+        sel.MOV(lsize, sel.selReg(ir::ocl::lsize0, TYPE_U32));
+
+        /* get lower divisible with 0xF part of lsize */
+        sel.SHR(lsizelw, lsize, GenRegister::immd(0x4));
+        sel.SHL(lsizelw, lsizelw, GenRegister::immd(0x4));
+
+        /* only interested in last 4 bits */
+        sel.AND(lsize, lsize, GenRegister::immd(0xF));
+
+        sel.push(); {
+          sel.curr.flag = 0;
+          sel.curr.subFlag = 1;
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          sel.curr.noMask = 1;
+
+          /* mask execution on selected work-items,
+           * local size must be multiple of execution width 0xF */
+          sel.CMP(GEN_CONDITIONAL_EQ, lsize, lsizelw,
+                  GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+          sel.curr.predicate = GEN_PREDICATE_NORMAL;
+          sel.CMP(GEN_CONDITIONAL_L, lsize, GenRegister::immd(0xF),
+                  GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+          sel.MOV(src, GenRegister::immd(0xFFFFFFFF));
+        }sel.pop();
+      }
+
       /* compute individual slice of workitems, (e.g. 0->16 workitems) */
       sel.MOV(slmOff, GenRegister::immud(insn.getSlmAddr()));
 
-- 
2.5.0



More information about the Beignet mailing list