[Beignet] [PATCH V4 2/2] Backend: add debugwait function

Yang, Rong R rong.r.yang at intel.com
Tue Nov 24 00:36:19 PST 2015


LGTM, pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Pan Xiuli
> Sent: Tuesday, November 24, 2015 15:52
> To: beignet at lists.freedesktop.org
> Cc: Pan, Xiuli
> Subject: [Beignet] [PATCH V4 2/2] Backend: add debugwait function
> 
> Use wait function to extend a debug function:
>     void debugwait(void)
> This function can hang the gpu unless gpu reset or host send something to let
> it go.
> EXTREMELY DANGEROUS for machines turn off hangcheck
> 
> v2:
> Fix some bugs, and add setting predicate and execwidth, also modify some
> inst scheduling
> 
> v3:
> Add push and pop in insturction selection, and set nomask with execwidth.
> 
> v4:
> Fix barrier predicate setting bugs, and rebase the patch
> 
> Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> ---
>  backend/src/backend/gen_context.cpp         |  3 ++-
>  backend/src/backend/gen_encoder.cpp         |  1 +
>  backend/src/backend/gen_insn_scheduling.cpp |  3 ++-
> backend/src/backend/gen_insn_selection.cpp  | 28
> +++++++++++++++++++++++--
> backend/src/backend/gen_insn_selection.hpp  |  1 +
>  backend/src/ir/instruction.cpp              | 32
> ++++++++++++++++++++++++++++-
>  backend/src/ir/instruction.hpp              | 10 +++++++++
>  backend/src/ir/instruction.hxx              |  1 +
>  backend/src/libocl/include/ocl_sync.h       |  1 +
>  backend/src/libocl/src/ocl_barrier.ll       |  6 ++++++
>  backend/src/libocl/src/ocl_sync.cl          |  1 +
>  backend/src/llvm/llvm_gen_backend.cpp       |  6 ++++++
>  backend/src/llvm/llvm_gen_ocl_function.hxx  |  3 +++
>  13 files changed, 91 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 41fe72d..43fa7fa 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -1804,7 +1804,7 @@ namespace gbe
>    }
> 
>    void GenContext::emitWaitInstruction(const SelectionInstruction &insn) {
> -    p->WAIT();
> +    p->WAIT(insn.extra.waitType);
>    }
> 
>    void GenContext::emitBarrierInstruction(const SelectionInstruction &insn)
> { @@ -1829,6 +1829,7 @@ namespace gbe
>        p->BARRIER(src);
>        p->curr.execWidth = 1;
>        // Now we wait for the other threads
> +      p->curr.predicate = GEN_PREDICATE_NONE;
>        p->WAIT();
>      p->pop();
>    }
> diff --git a/backend/src/backend/gen_encoder.cpp
> b/backend/src/backend/gen_encoder.cpp
> index 1ad4f01..7c4357a 100644
> --- a/backend/src/backend/gen_encoder.cpp
> +++ b/backend/src/backend/gen_encoder.cpp
> @@ -996,6 +996,7 @@ namespace gbe
> 
>    void GenEncoder::WAIT(uint32_t n) {
>       GenNativeInstruction *insn = this->next(GEN_OPCODE_WAIT);
> +     GBE_ASSERT(curr.predicate == GEN_PREDICATE_NONE);
>       GenRegister src = GenRegister::notification0(n);
>       this->setDst(insn, GenRegister::null());
>       this->setSrc0(insn, src);
> diff --git a/backend/src/backend/gen_insn_scheduling.cpp
> b/backend/src/backend/gen_insn_scheduling.cpp
> index 43f67c9..8111e0c 100644
> --- a/backend/src/backend/gen_insn_scheduling.cpp
> +++ b/backend/src/backend/gen_insn_scheduling.cpp
> @@ -591,7 +591,8 @@ namespace gbe
>            || node->insn.opcode == SEL_OP_READ_ARF
>            || node->insn.opcode == SEL_OP_BARRIER
>            || node->insn.opcode == SEL_OP_CALC_TIMESTAMP
> -          || node->insn.opcode == SEL_OP_STORE_PROFILING)
> +          || node->insn.opcode == SEL_OP_STORE_PROFILING
> +          || node->insn.opcode == SEL_OP_WAIT)
>          tracker.makeBarrier(insnID, insnNum);
>      }
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index e1cf6f7..ed7514c 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -619,7 +619,7 @@ namespace gbe
>      /*! No-op */
>      void NOP(void);
>      /*! Wait instruction (used for the barrier) */
> -    void WAIT(void);
> +    void WAIT(uint32_t n = 0);
>      /*! Atomic instruction */
>      void ATOMIC(Reg dst, uint32_t function, uint32_t srcNum, Reg src0, Reg
> src1, Reg src2, GenRegister bti, vector<GenRegister> temps);
>      /*! Read 64 bits float/int array */ @@ -1293,7 +1293,11 @@ namespace
> gbe
> 
>    void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
>    void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
> -  void Selection::Opaque::WAIT(void) { this->appendInsn(SEL_OP_WAIT, 0,
> 0); }
> +  void Selection::Opaque::WAIT(uint32_t n)  {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_WAIT, 0, 0);
> +    insn->extra.waitType = n;
> +  }
> 
>    void Selection::Opaque::READ64(Reg addr,
>                                   const GenRegister *dst, @@ -3465,6 +3469,25 @@
> namespace gbe
>      DECL_CTOR(SyncInstruction, 1,1);
>    };
> 
> +  /*! Wait instruction */
> +  DECL_PATTERN(WaitInstruction)
> +  {
> +    INLINE bool emitOne(Selection::Opaque &sel, const ir::WaitInstruction
> &insn, bool &markChildren) const
> +    {
> +      using namespace ir;
> +      // Debugwait will use reg 1, which is different from barrier
> +      sel.push();
> +        sel.curr.noMask = 1;
> +        sel.curr.execWidth = 1;
> +        sel.curr.predicate = GEN_PREDICATE_NONE;
> +        sel.WAIT(1);
> +      sel.pop();
> +      return true;
> +    }
> +
> +    DECL_CTOR(WaitInstruction, 1,1);
> +  };
> +
>    INLINE uint32_t getByteScatterGatherSize(Selection::Opaque &sel, ir::Type
> type) {
>      using namespace ir;
>      switch (type) {
> @@ -5978,6 +6001,7 @@ namespace gbe
>      this->insert<CalcTimestampInstructionPattern>();
>      this->insert<StoreProfilingInstructionPattern>();
>      this->insert<NullaryInstructionPattern>();
> +    this->insert<WaitInstructionPattern>();
> 
>      // Sort all the patterns with the number of instructions they output
>      for (uint32_t op = 0; op < ir::OP_INVALID; ++op) diff --git
> a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 32e5ce2..0070ac2 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -136,6 +136,7 @@ namespace gbe
>          uint16_t lut_sub:2;
>        };
>        uint32_t barrierType;
> +      uint32_t waitType;
>        bool longjmp;
>        uint32_t indirect_offset;
>        struct {
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 6ed0b89..c7facfb 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -949,6 +949,21 @@ namespace ir {
>        Register dst[0], src[0];
>      };
> 
> +    /*! Wait instructions */
> +    class ALIGNED_INSTRUCTION WaitInstruction :
> +      public BasePolicy,
> +      public NSrcPolicy<WaitInstruction, 0>,
> +      public NDstPolicy<WaitInstruction, 0>
> +    {
> +    public:
> +      INLINE WaitInstruction() {
> +        this->opcode = OP_WAIT;
> +      }
> +      INLINE bool wellFormed(const Function &fn, std::string &why) const;
> +      INLINE void out(std::ostream &out, const Function &fn) const;
> +      Register dst[0], src[0];
> +    };
> +
>  #undef ALIGNED_INSTRUCTION
> 
>      /////////////////////////////////////////////////////////////////////////
> @@ -1247,6 +1262,8 @@ namespace ir {
>      { return true; }
>      INLINE bool GetImageInfoInstruction::wellFormed(const Function &fn,
> std::string &why) const
>      { return true; }
> +    INLINE bool WaitInstruction::wellFormed(const Function &fn, std::string
> &why) const
> +    { return true; }
> 
> 
>      // Ensure that types and register family match @@ -1531,6 +1548,9 @@
> namespace ir {
>            out << "." << syncStr[field];
>      }
> 
> +    INLINE void WaitInstruction::out(std::ostream &out, const Function &fn)
> const {
> +      this->outOpcode(out);
> +    }
> 
>    } /* namespace internal */
> 
> @@ -1680,6 +1700,10 @@ START_INTROSPECTION(LabelInstruction)
>  #include "ir/instruction.hxx"
>  END_INTROSPECTION(LabelInstruction)
> 
> +START_INTROSPECTION(WaitInstruction)
> +#include "ir/instruction.hxx"
> +END_INTROSPECTION(WaitInstruction)
> +
>  START_INTROSPECTION(VmeInstruction)
>  #include "ir/instruction.hxx"
>  END_INTROSPECTION(VmeInstruction)
> @@ -1829,7 +1853,8 @@ END_FUNCTION(Instruction, Register)
>             opcode == OP_SYNC ||
>             opcode == OP_ATOMIC ||
>             opcode == OP_CALC_TIMESTAMP ||
> -           opcode == OP_STORE_PROFILING;
> +           opcode == OP_STORE_PROFILING ||
> +           opcode == OP_WAIT;
>    }
> 
>  #define DECL_MEM_FN(CLASS, RET, PROTOTYPE, CALL) \
> @@ -2174,6 +2199,11 @@ DECL_MEM_FN(MemInstruction, void,
> setBtiReg(Register reg), setBtiReg(reg))
>      return internal::StoreProfilingInstruction(bti, profilingType).convert();
>    }
> 
> +  // WAIT
> +  Instruction WAIT(void) {
> +    return internal::WaitInstruction().convert();
> +  }
> +
>    std::ostream &operator<< (std::ostream &out, const Instruction &insn) {
>      const Function &fn = insn.getFunction();
>      const BasicBlock *bb = insn.getParent(); diff --git
> a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index
> 7862bbf..76ffd77 100644
> --- a/backend/src/ir/instruction.hpp
> +++ b/backend/src/ir/instruction.hpp
> @@ -576,6 +576,13 @@ namespace ir {
>      static bool isClassOf(const Instruction &insn);
>    };
> 
> +  /*! Indirect Move instruction */
> +  class WaitInstruction : public Instruction {
> +  public:
> +    /*! Return true if the given instruction is an instance of this class */
> +    static bool isClassOf(const Instruction &insn);  };
> +
>    /*! Specialize the instruction. Also performs typechecking first based on the
>     *  opcode. Crashes if it fails
>     */
> @@ -797,6 +804,9 @@ namespace ir {
>    Instruction CALC_TIMESTAMP(uint32_t pointNum, uint32_t tsType);
>    /*! calculate the execute timestamp for profiling */
>    Instruction STORE_PROFILING(uint32_t bti, uint32_t Type);
> +  /*! wait */
> +  Instruction WAIT(void);
> +
>  } /* namespace ir */
>  } /* namespace gbe */
> 
> diff --git a/backend/src/ir/instruction.hxx b/backend/src/ir/instruction.hxx
> index 1282747..efdd4c5 100644
> --- a/backend/src/ir/instruction.hxx
> +++ b/backend/src/ir/instruction.hxx
> @@ -109,3 +109,4 @@ DECL_INSN(ELSE, BranchInstruction)
> DECL_INSN(WHILE, BranchInstruction)  DECL_INSN(CALC_TIMESTAMP,
> CalcTimestampInstruction)  DECL_INSN(STORE_PROFILING,
> StoreProfilingInstruction)
> +DECL_INSN(WAIT, WaitInstruction)
> diff --git a/backend/src/libocl/include/ocl_sync.h
> b/backend/src/libocl/include/ocl_sync.h
> index 18090d5..1d90cae 100644
> --- a/backend/src/libocl/include/ocl_sync.h
> +++ b/backend/src/libocl/include/ocl_sync.h
> @@ -31,5 +31,6 @@ OVERLOADABLE void barrier(cl_mem_fence_flags flags);
> void mem_fence(cl_mem_fence_flags flags);  void
> read_mem_fence(cl_mem_fence_flags flags);  void
> write_mem_fence(cl_mem_fence_flags flags);
> +OVERLOADABLE void debugwait(void);
> 
>  #endif  /* __OCL_SYNC_H__ */
> diff --git a/backend/src/libocl/src/ocl_barrier.ll
> b/backend/src/libocl/src/ocl_barrier.ll
> index 2765a71..9416f80 100644
> --- a/backend/src/libocl/src/ocl_barrier.ll
> +++ b/backend/src/libocl/src/ocl_barrier.ll
> @@ -12,6 +12,7 @@ declare i32 @_get_global_mem_fence() nounwind
> alwaysinline  declare void @__gen_ocl_barrier_local() nounwind alwaysinline
> noduplicate  declare void @__gen_ocl_barrier_global() nounwind
> alwaysinline noduplicate  declare void
> @__gen_ocl_barrier_local_and_global() nounwind alwaysinline noduplicate
> +declare void @__gen_ocl_debugwait() nounwind alwaysinline noduplicate
> 
>  define void @_Z7barrierj(i32 %flags) nounwind noduplicate alwaysinline {
>    %1 = icmp eq i32 %flags, 3
> @@ -40,3 +41,8 @@ barrier_global:
>  done:
>    ret void
>  }
> +
> +define void @_Z9debugwaitv() nounwind noduplicate alwaysinline {
> +  call void @__gen_ocl_debugwait()
> +  ret void
> +}
> diff --git a/backend/src/libocl/src/ocl_sync.cl
> b/backend/src/libocl/src/ocl_sync.cl
> index d008639..70d6f26 100644
> --- a/backend/src/libocl/src/ocl_sync.cl
> +++ b/backend/src/libocl/src/ocl_sync.cl
> @@ -20,6 +20,7 @@
>  void __gen_ocl_barrier_local(void);
>  void __gen_ocl_barrier_global(void);
>  void __gen_ocl_barrier_local_and_global(void);
> +void __gen_ocl_debugwait(void);
> 
>  void mem_fence(cl_mem_fence_flags flags) {  } diff --git
> a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index d1b6f98..a0b2262 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -3601,6 +3601,7 @@ namespace gbe
>        case GEN_OCL_PRINTF:
>        case GEN_OCL_CALC_TIMESTAMP:
>        case GEN_OCL_STORE_PROFILING:
> +      case GEN_OCL_DEBUGWAIT:
>          break;
>        case GEN_OCL_NOT_FOUND:
>        default:
> @@ -4394,6 +4395,11 @@ namespace gbe
>              ctx.SIMD_SHUFFLE(getType(ctx, I.getType()), dst, src0, src1);
>              break;
>            }
> +          case GEN_OCL_DEBUGWAIT:
> +          {
> +            ctx.WAIT();
> +            break;
> +          }
>            default: break;
>          }
>        }
> diff --git a/backend/src/llvm/llvm_gen_ocl_function.hxx
> b/backend/src/llvm/llvm_gen_ocl_function.hxx
> index 65bf0c1..d0e3614 100644
> --- a/backend/src/llvm/llvm_gen_ocl_function.hxx
> +++ b/backend/src/llvm/llvm_gen_ocl_function.hxx
> @@ -177,3 +177,6 @@ DECL_LLVM_GEN_FUNCTION(PRINTF,
> __gen_ocl_printf)  DECL_LLVM_GEN_FUNCTION(CALC_TIMESTAMP,
> __gen_ocl_calc_timestamp)  // store profiling info to the mem.
>  DECL_LLVM_GEN_FUNCTION(STORE_PROFILING,
> __gen_ocl_store_profiling)
> +
> +// debug wait function
> +DECL_LLVM_GEN_FUNCTION(DEBUGWAIT, __gen_ocl_debugwait)
> --
> 2.1.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list