[Beignet] [PATCH V4 2/2] Backend: add debugwait function

Pan Xiuli xiuli.pan at intel.com
Mon Nov 23 23:51:41 PST 2015


Use wait function to extend a debug function:
    void debugwait(void)
This function can hang the gpu unless gpu reset
or host send something to let it go.
EXTREMELY DANGEROUS for machines turn off hangcheck

v2:
Fix some bugs, and add setting predicate and execwidth,
also modify some inst scheduling

v3:
Add push and pop in insturction selection, and set nomask
with execwidth.

v4:
Fix barrier predicate setting bugs, and rebase the patch

Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
 backend/src/backend/gen_context.cpp         |  3 ++-
 backend/src/backend/gen_encoder.cpp         |  1 +
 backend/src/backend/gen_insn_scheduling.cpp |  3 ++-
 backend/src/backend/gen_insn_selection.cpp  | 28 +++++++++++++++++++++++--
 backend/src/backend/gen_insn_selection.hpp  |  1 +
 backend/src/ir/instruction.cpp              | 32 ++++++++++++++++++++++++++++-
 backend/src/ir/instruction.hpp              | 10 +++++++++
 backend/src/ir/instruction.hxx              |  1 +
 backend/src/libocl/include/ocl_sync.h       |  1 +
 backend/src/libocl/src/ocl_barrier.ll       |  6 ++++++
 backend/src/libocl/src/ocl_sync.cl          |  1 +
 backend/src/llvm/llvm_gen_backend.cpp       |  6 ++++++
 backend/src/llvm/llvm_gen_ocl_function.hxx  |  3 +++
 13 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 41fe72d..43fa7fa 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1804,7 +1804,7 @@ namespace gbe
   }
 
   void GenContext::emitWaitInstruction(const SelectionInstruction &insn) {
-    p->WAIT();
+    p->WAIT(insn.extra.waitType);
   }
 
   void GenContext::emitBarrierInstruction(const SelectionInstruction &insn) {
@@ -1829,6 +1829,7 @@ namespace gbe
       p->BARRIER(src);
       p->curr.execWidth = 1;
       // Now we wait for the other threads
+      p->curr.predicate = GEN_PREDICATE_NONE;
       p->WAIT();
     p->pop();
   }
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 1ad4f01..7c4357a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -996,6 +996,7 @@ namespace gbe
 
   void GenEncoder::WAIT(uint32_t n) {
      GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
+     GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
      GenRegister src = GenRegister::notification0(n);
      this->setDst(insn, GenRegister::null());
      this->setSrc0(insn, src);
diff --git a/backend/src/backend/gen_insn_scheduling.cpp b/backend/src/backend/gen_insn_scheduling.cpp
index 43f67c9..8111e0c 100644
--- a/backend/src/backend/gen_insn_scheduling.cpp
+++ b/backend/src/backend/gen_insn_scheduling.cpp
@@ -591,7 +591,8 @@ namespace gbe
           || node->insn.opcode == SEL_OP_READ_ARF
           || node->insn.opcode == SEL_OP_BARRIER
           || node->insn.opcode == SEL_OP_CALC_TIMESTAMP
-          || node->insn.opcode == SEL_OP_STORE_PROFILING)
+          || node->insn.opcode == SEL_OP_STORE_PROFILING
+          || node->insn.opcode == SEL_OP_WAIT)
         tracker.makeBarrier(insnID, insnNum);
     }
 
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e1cf6f7..ed7514c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -619,7 +619,7 @@ namespace gbe
     /*! No-op */
     void NOP(void);
     /*! Wait instruction (used for the barrier) */
-    void WAIT(void);
+    void WAIT(uint32_t n = 0);
     /*! Atomic instruction */
     void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
     /*! Read 64 bits float/int array */
@@ -1293,7 +1293,11 @@ namespace gbe
 
   void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
   void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
-  void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0, 0); }
+  void Selection::Opaque::WAIT(uint32_t n)
+  {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0);
+    insn->extra.waitType = n;
+  }
 
   void Selection::Opaque::READ64(Reg addr,
                                  const GenRegister *dst,
@@ -3465,6 +3469,25 @@ namespace gbe
     DECL_CTOR(SyncInstruction, 1,1);
   };
 
+  /*! Wait instruction */
+  DECL_PATTERN(WaitInstruction)
+  {
+    INLINE bool emitOne(Selection::Opaque &sel, const ir::WaitInstruction &insn, bool &markChildren) const
+    {
+      using namespace ir;
+      // Debugwait will use reg 1, which is different from barrier
+      sel.push();
+        sel.curr.noMask = 1;
+        sel.curr.execWidth = 1;
+        sel.curr.predicate = GEN_PREDICATE_NONE;
+        sel.WAIT(1);
+      sel.pop();
+      return true;
+    }
+
+    DECL_CTOR(WaitInstruction, 1,1);
+  };
+
   INLINE uint32_t getByteScatterGatherSize(Selection::Opaque &sel, ir::Type type) {
     using namespace ir;
     switch (type) {
@@ -5978,6 +6001,7 @@ namespace gbe
     this->insert<CalcTimestampInstructionPattern>();
     this->insert<StoreProfilingInstructionPattern>();
     this->insert<NullaryInstructionPattern>();
+    this->insert<WaitInstructionPattern>();
 
     // Sort all the patterns with the number of instructions they output
     for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 32e5ce2..0070ac2 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -136,6 +136,7 @@ namespace gbe
         uint16_t lut_sub:2;
       };
       uint32_t barrierType;
+      uint32_t waitType;
       bool longjmp;
       uint32_t indirect_offset;
       struct {
diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
index 6ed0b89..c7facfb 100644
--- a/backend/src/ir/instruction.cpp
+++ b/backend/src/ir/instruction.cpp
@@ -949,6 +949,21 @@ namespace ir {
       Register dst[0], src[0];
     };
 
+    /*! Wait instructions */
+    class ALIGNED_INSTRUCTION WaitInstruction :
+      public BasePolicy,
+      public NSrcPolicy<WaitInstruction, 0>,
+      public NDstPolicy<WaitInstruction, 0>
+    {
+    public:
+      INLINE WaitInstruction() {
+        this->opcode = OP_WAIT;
+      }
+      INLINE bool wellFormed(const Function &fn, std::string &why) const;
+      INLINE void out(std::ostream &out, const Function &fn) const;
+      Register dst[0], src[0];
+    };
+
 #undef ALIGNED_INSTRUCTION
 
     /////////////////////////////////////////////////////////////////////////
@@ -1247,6 +1262,8 @@ namespace ir {
     { return true; }
     INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn, std::string &why) const
     { return true; }
+    INLINE bool WaitInstruction::wellFormed(const Function &fn, std::string &why) const
+    { return true; }
 
 
     // Ensure that types and register family match
@@ -1531,6 +1548,9 @@ namespace ir {
           out << "." << syncStr[field];
     }
 
+    INLINE void WaitInstruction::out(std::ostream &out, const Function &fn) const {
+      this->outOpcode(out);
+    }
 
   } /* namespace internal */
 
@@ -1680,6 +1700,10 @@ START_INTROSPECTION(LabelInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(LabelInstruction)
 
+START_INTROSPECTION(WaitInstruction)
+#include "ir/instruction.hxx"
+END_INTROSPECTION(WaitInstruction)
+
 START_INTROSPECTION(VmeInstruction)
 #include "ir/instruction.hxx"
 END_INTROSPECTION(VmeInstruction)
@@ -1829,7 +1853,8 @@ END_FUNCTION(Instruction, Register)
            opcode == OP_SYNC ||
            opcode == OP_ATOMIC ||
            opcode == OP_CALC_TIMESTAMP ||
-           opcode == OP_STORE_PROFILING;
+           opcode == OP_STORE_PROFILING ||
+           opcode == OP_WAIT;
   }
 
 #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
@@ -2174,6 +2199,11 @@ DECL_MEM_FN(MemInstruction, void,     setBtiReg(Register reg), setBtiReg(reg))
     return internal::StoreProfilingInstruction(bti, profilingType).convert();
   }
 
+  // WAIT
+  Instruction WAIT(void) {
+    return internal::WaitInstruction().convert();
+  }
+
   std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
     const Function &fn = insn.getFunction();
     const BasicBlock *bb = insn.getParent();
diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp
index 7862bbf..76ffd77 100644
--- a/backend/src/ir/instruction.hpp
+++ b/backend/src/ir/instruction.hpp
@@ -576,6 +576,13 @@ namespace ir {
     static bool isClassOf(const Instruction &insn);
   };
 
+  /*! Indirect Move instruction */
+  class WaitInstruction : public Instruction {
+  public:
+    /*! Return true if the given instruction is an instance of this class */
+    static bool isClassOf(const Instruction &insn);
+  };
+
   /*! Specialize the instruction. Also performs typechecking first based on the
    *  opcode. Crashes if it fails
    */
@@ -797,6 +804,9 @@ namespace ir {
   Instruction CALC_TIMESTAMP(uint32_t pointNum, uint32_t tsType);
   /*! calculate the execute timestamp for profiling */
   Instruction STORE_PROFILING(uint32_t bti, uint32_t Type);
+  /*! wait */
+  Instruction WAIT(void);
+
 } /* namespace ir */
 } /* namespace gbe */
 
diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
index 1282747..efdd4c5 100644
--- a/backend/src/ir/instruction.hxx
+++ b/backend/src/ir/instruction.hxx
@@ -109,3 +109,4 @@ DECL_INSN(ELSE, BranchInstruction)
 DECL_INSN(WHILE, BranchInstruction)
 DECL_INSN(CALC_TIMESTAMP, CalcTimestampInstruction)
 DECL_INSN(STORE_PROFILING, StoreProfilingInstruction)
+DECL_INSN(WAIT, WaitInstruction)
diff --git a/backend/src/libocl/include/ocl_sync.h b/backend/src/libocl/include/ocl_sync.h
index 18090d5..1d90cae 100644
--- a/backend/src/libocl/include/ocl_sync.h
+++ b/backend/src/libocl/include/ocl_sync.h
@@ -31,5 +31,6 @@ OVERLOADABLE void barrier(cl_mem_fence_flags flags);
 void mem_fence(cl_mem_fence_flags flags);
 void read_mem_fence(cl_mem_fence_flags flags);
 void write_mem_fence(cl_mem_fence_flags flags);
+OVERLOADABLE void debugwait(void);
 
 #endif  /* __OCL_SYNC_H__ */
diff --git a/backend/src/libocl/src/ocl_barrier.ll b/backend/src/libocl/src/ocl_barrier.ll
index 2765a71..9416f80 100644
--- a/backend/src/libocl/src/ocl_barrier.ll
+++ b/backend/src/libocl/src/ocl_barrier.ll
@@ -12,6 +12,7 @@ declare i32 @_get_global_mem_fence() nounwind alwaysinline
 declare void @__gen_ocl_barrier_local() nounwind alwaysinline noduplicate
 declare void @__gen_ocl_barrier_global() nounwind alwaysinline noduplicate
 declare void @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate
+declare void @__gen_ocl_debugwait() nounwind alwaysinline noduplicate
 
 define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline {
   %1 = icmp eq i32 %flags, 3
@@ -40,3 +41,8 @@ barrier_global:
 done:
   ret void
 }
+
+define void @_Z9debugwaitv() nounwind noduplicate alwaysinline {
+  call void @__gen_ocl_debugwait()
+  ret void
+}
diff --git a/backend/src/libocl/src/ocl_sync.cl b/backend/src/libocl/src/ocl_sync.cl
index d008639..70d6f26 100644
--- a/backend/src/libocl/src/ocl_sync.cl
+++ b/backend/src/libocl/src/ocl_sync.cl
@@ -20,6 +20,7 @@
 void __gen_ocl_barrier_local(void);
 void __gen_ocl_barrier_global(void);
 void __gen_ocl_barrier_local_and_global(void);
+void __gen_ocl_debugwait(void);
 
 void mem_fence(cl_mem_fence_flags flags) {
 }
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index d1b6f98..a0b2262 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -3601,6 +3601,7 @@ namespace gbe
       case GEN_OCL_PRINTF:
       case GEN_OCL_CALC_TIMESTAMP:
       case GEN_OCL_STORE_PROFILING:
+      case GEN_OCL_DEBUGWAIT:
         break;
       case GEN_OCL_NOT_FOUND:
       default:
@@ -4394,6 +4395,11 @@ namespace gbe
             ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
             break;
           }
+          case GEN_OCL_DEBUGWAIT:
+          {
+            ctx.WAIT();
+            break;
+          }
           default: break;
         }
       }
diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx b/backend/src/llvm/llvm_gen_ocl_function.hxx
index 65bf0c1..d0e3614 100644
--- a/backend/src/llvm/llvm_gen_ocl_function.hxx
+++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
@@ -177,3 +177,6 @@ DECL_LLVM_GEN_FUNCTION(PRINTF, __gen_ocl_printf)
 DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP, __gen_ocl_calc_timestamp)
 // store profiling info to the mem.
 DECL_LLVM_GEN_FUNCTION(STORE_PROFILING, __gen_ocl_store_profiling)
+
+// debug wait function
+DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait)
-- 
2.1.4



More information about the Beignet mailing list