[Beignet] [PATCH 3/4] GBE: fix the potential issue when there are inactive lanes.

Song, Ruiling ruiling.song at intel.com
Mon Dec 30 21:42:02 PST 2013


Only one minor typo in your comment, all other parts looks good to me. Thanks!

-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Monday, December 30, 2013 7:07 PM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH 3/4] GBE: fix the potential issue when there are inactive lanes.

If there are some inactive lanes, then the JMPI with all16h may fail to jum even all the active lanes are in false condition.

Then it may execute a BB with all zero flag, and when the BB has some noMask/noPredication instructions, it may bring unexpected result. this patch fixes this problem by the following method.
It use two UW register to fixup the flag result before each JMP. Before the ALL16/8H JMPI, it set the inactive lane to 1s.
Before the ANY16/8 JMPI, it clear all the inactive lane to 0s.

It introduces one extra instruction before each predicatable JMPI instruction. It causes a little bit overhead.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/backend/context.cpp            |    9 ++++----
 backend/src/backend/gen_context.cpp        |   16 ++++++++++++--
 backend/src/backend/gen_insn_selection.cpp |   33 +++++++++++++++++++++++-----
 backend/src/backend/gen_reg_allocation.cpp |    7 ++++++
 backend/src/backend/program.h              |    4 +++-
 backend/src/ir/function.cpp                |    2 +-
 backend/src/ir/profile.cpp                 |    4 +++-
 backend/src/ir/profile.hpp                 |    6 +++--
 8 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index 1bbe700..88375ad 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -430,6 +430,8 @@ namespace gbe
 
     // We insert the block IP mask first
     this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t)));
+    this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0,  sizeof(uint32_t)));
+    this->insertCurbeReg(ir::ocl::notemask, 
+ this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint32_t)));
 
     // Go over the arguments and find the related patch locations
     const uint32_t argNum = fn.argNum(); @@ -512,9 +514,6 @@ namespace gbe
     });
 #undef INSERT_REG
 
-    // Insert the number of threads
-    insertCurbeReg(ir::ocl::threadn, this->newCurbeEntry(GBE_CURBE_THREAD_NUM, 0, sizeof(uint32_t)));
-
     // Insert the stack buffer if used
     if (useStackPtr)
       insertCurbeReg(ir::ocl::stackptr, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize)); @@ -686,7 +685,9 @@ namespace gbe
         reg == ir::ocl::goffset0  ||
         reg == ir::ocl::goffset1  ||
         reg == ir::ocl::goffset2  ||
-        reg == ir::ocl::workdim)
+        reg == ir::ocl::workdim   ||
+        reg == ir::ocl::emask     ||
+        reg == ir::ocl::notemask)
       return true;
     return false;
   }
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 6fd2dea..97ae2b3 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -91,12 +91,24 @@ namespace gbe
   void GenContext::clearFlagRegister(void) {
     // when group size not aligned to simdWidth, flag register need clear to
     // make prediction(any8/16h) work correctly
+    const GenRegister emaskReg = ra->genReg(GenRegister::uw1grf(ir::ocl::emask));
+    const GenRegister notEmaskReg = ra->genReg(GenRegister::uw1grf(ir::ocl::notemask));
+    uint32_t execWidth = p->curr.execWidth;
     p->push();
     p->curr.predicate = GEN_PREDICATE_NONE;
     p->curr.noMask = 1;
+    /* clear all the bit in f0.0. */
     p->curr.execWidth = 1;
-    p->MOV(GenRegister::retype(GenRegister::flag(0,0), GEN_TYPE_UD), GenRegister::immud(0x0));
-    p->MOV(GenRegister::retype(GenRegister::flag(1,0), GEN_TYPE_UD), GenRegister::immud(0x0));
+    p->MOV(GenRegister::retype(GenRegister::flag(0, 0), GEN_TYPE_UW), GenRegister::immud(0x0000));
+    p->curr.noMask = 0;
+    p->curr.useFlag(0, 0);
+    p->curr.execWidth = execWidth;
+    /* set all the active lane to 1. Inactive lane remains 0. */
+    p->CMP(GEN_CONDITIONAL_EQ, GenRegister::ud16grf(126, 0), GenRegister::ud16grf(126, 0));
+    p->curr.noMask = 1;
+    p->curr.execWidth = 1;
+    p->MOV(emaskReg, GenRegister::retype(GenRegister::flag(0, 0), GEN_TYPE_UW));
+    p->XOR(notEmaskReg, emaskReg, GenRegister::immud(0xFFFF));
     p->pop();
   }
 
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 23e9da7..ecff0f2 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2882,6 +2882,14 @@ namespace gbe
       if (sel.ctx.hasJIP(&insn)) {
         const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
         sel.push();
+
+          sel.curr.noMask = 1;
+          sel.curr.execWidth = 1;
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          GenRegister emaskReg = GenRegister::uw1grf(ocl::emask);
+          GenRegister flagReg = GenRegister::flag(0, 0);
+          sel.AND(flagReg, flagReg, emaskReg);
+
           if (simdWidth == 8)
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H;
           else if (simdWidth == 16)
@@ -2889,10 +2897,8 @@ namespace gbe
           else
             NOT_IMPLEMENTED;
           sel.curr.inversePredicate = 1;
-          sel.curr.execWidth = 1;
           sel.curr.flag = 0;
           sel.curr.subFlag = 0;
-          sel.curr.noMask = 1;
           sel.JMPI(GenRegister::immd(0), jip);
         sel.pop();
       }
@@ -3032,6 +3038,9 @@ namespace gbe
         // It is slightly more complicated than for backward jump. We check that
         // all PcIPs are greater than the next block IP to be sure that we can
         // jump
+        // We set all the inactive channel to 1 as the GEN_PREDICATE_ALIGN1_ALL8/16
+        // will check those bits as well.
+
         sel.push();
           sel.curr.physicalFlag = 0;
           sel.curr.flagIndex = uint16_t(pred); @@ -3042,14 +3051,19 @@ namespace gbe
           // XXX TODO: For group size not aligned to simdWidth, ALL8/16h may not
           // work correct, as flag register bits mapped to non-active lanes tend
           // to be zero.
+
+          sel.curr.execWidth = 1;
+          sel.curr.noMask = 1;
+          GenRegister notEmaskReg = GenRegister::uw1grf(ocl::notemask);
+          sel.OR(sel.selReg(pred, TYPE_U16), sel.selReg(pred, 
+ TYPE_U16), notEmaskReg);
+
           if (simdWidth == 8)
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
           else if (simdWidth == 16)
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL16H;
           else
             NOT_SUPPORTED;
-          sel.curr.execWidth = 1;
-          sel.curr.noMask = 1;
+
           sel.JMPI(GenRegister::immd(0), jip);
         sel.pop();
 
@@ -3084,6 +3098,7 @@ namespace gbe
       if (insn.isPredicated() == true) {
         const Register pred = insn.getPredicateIndex();
 
+
         // Update the PcIPs for all the branches. Just put the IPs of the next
         // block. Next instruction will properly reupdate the IPs of the lanes
         // that actually take the branch @@ -3096,6 +3111,14 @@ namespace gbe
           sel.curr.flagIndex = uint16_t(pred);
           sel.MOV(ip, GenRegister::immuw(uint16_t(dst)));
 
+        // We clear all the inactive channel to 0 as the GEN_PREDICATE_ALIGN1_ALL8/16
Here it is "clear all the inactive channel to 0 as the GEN_PREDICATE_ALIGN1_ANY8/16"
+        // will check those bits as well.
+          sel.curr.predicate = GEN_PREDICATE_NONE;
+          sel.curr.execWidth = 1;
+          sel.curr.noMask = 1;
+          GenRegister emaskReg = GenRegister::uw1grf(ocl::emask);
+          sel.AND(sel.selReg(pred, TYPE_U16), sel.selReg(pred, 
+ TYPE_U16), emaskReg);
+
           // Branch to the jump target
           if (simdWidth == 8)
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY8H; @@ -3103,8 +3126,6 @@ namespace gbe
             sel.curr.predicate = GEN_PREDICATE_ALIGN1_ANY16H;
           else
             NOT_SUPPORTED;
-          sel.curr.execWidth = 1;
-          sel.curr.noMask = 1;
           sel.JMPI(GenRegister::immd(0), jip);
         sel.pop();
 
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 2bb0d19..02fc534 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -634,6 +634,13 @@ namespace gbe
         this->intervals[reg].maxID = std::max(this->intervals[reg].maxID, lastID);
     }
 
+    this->intervals[ocl::emask].minID = 0;
+    this->intervals[ocl::emask].maxID = INT_MAX;
+    this->intervals[ocl::notemask].minID = 0;
+    this->intervals[ocl::notemask].maxID = INT_MAX;
+    this->intervals[ocl::retVal].minID = INT_MAX;
+    this->intervals[ocl::retVal].maxID = -INT_MAX;
+
     // Sort both intervals in starting point and ending point increasing orders
     const uint32_t regNum = ctx.sel->getRegNum();
     this->starting.resize(regNum);
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h index e574764..9a3570e 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -76,7 +76,9 @@ enum gbe_curbe_type {
   GBE_CURBE_KERNEL_ARGUMENT,
   GBE_CURBE_EXTRA_ARGUMENT,
   GBE_CURBE_BLOCK_IP,
-  GBE_CURBE_THREAD_NUM
+  GBE_CURBE_THREAD_NUM,
+  GBE_CURBE_EMASK,
+  GBE_CURBE_NOT_EMASK,
 };
 
 /*! Extra arguments use the negative range of sub-values */ diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp index c15c292..4273f66 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -227,7 +227,7 @@ namespace ir {
         GBE_ASSERT(target != NULL);
         target->predecessors.insert(&bb);
         bb.successors.insert(target);
-        if (insn.isPredicated() == true) jumpToNext = &bb;
+        if ( insn.isPredicated() == true) jumpToNext = &bb;
       }
     });
   }
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp index b693c2b..9c3f333 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -40,7 +40,7 @@ namespace ir {
         "stack_pointer",
         "block_ip",
         "barrier_id", "thread_number",
-        "work_dimension", "sampler_info", "retVal"
+        "work_dimension", "sampler_info", "emask", "notemask", "retVal"
     };
 
 #if GBE_DEBUG
@@ -77,6 +77,8 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, threadn);
       DECL_NEW_REG(FAMILY_DWORD, workdim);
       DECL_NEW_REG(FAMILY_WORD, samplerinfo);
+      DECL_NEW_REG(FAMILY_WORD, emask);
+      DECL_NEW_REG(FAMILY_WORD, notemask);
       DECL_NEW_REG(FAMILY_WORD, retVal);
     }
 #undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp index 2f16741..90c504f 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -65,8 +65,10 @@ namespace ir {
     static const Register threadn = Register(21);  // number of threads
     static const Register workdim = Register(22);  // work dimention.
     static const Register samplerinfo = Register(23); // store sampler info.
-    static const Register retVal = Register(24);   // helper register to do data flow analysis.
-    static const uint32_t regNum = 25;             // number of special registers
+    static const Register emask = Register(24);    // store the emask bits for the branching fix.
+    static const Register notemask = Register(25); // store the !emask bits for the branching fix.
+    static const Register retVal = Register(26);   // helper register to do data flow analysis.
+    static const uint32_t regNum = 27;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
--
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list