[Beignet] [PATCH v3 2/4] Add constant pointer as argument support in kernel.

Zhigang Gong zhigang.gong at linux.intel.com
Mon Apr 22 02:21:44 PDT 2013


On Mon, Apr 22, 2013 at 01:11:50PM +0800, Yang Rong wrote:
Two minor problem, please check it as below. No need for v4 patch.
I will fix them. Thanks for the patchset.

> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/backend/context.cpp                    |   25 ++++++++-
>  backend/src/backend/context.hpp                    |    2 +
>  backend/src/backend/gen_context.cpp                |   28 +++++++++-
>  backend/src/backend/gen_context.hpp                |    1 +
>  .../src/backend/gen_insn_gen7_schedule_info.hxx    |    1 +
>  backend/src/backend/gen_insn_selection.cpp         |   58 ++++++++++++++++++--
>  backend/src/backend/gen_insn_selection.hxx         |    1 +
>  backend/src/backend/gen_program.cpp                |    5 +-
>  backend/src/backend/gen_register.hpp               |   10 ++++
>  backend/src/backend/program.cpp                    |   13 ++++-
>  backend/src/backend/program.h                      |    7 ++-
>  backend/src/backend/program.hpp                    |   15 ++++-
>  12 files changed, 152 insertions(+), 14 deletions(-)
> 
> diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
> index c3ddb59..4e16fc0 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -53,7 +53,7 @@ namespace gbe
>       *  the hardware. Note that we always use the left most block when
>       *  allocating, so it makes sense for constant pushing
>       */
> -    int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
> +    int16_t allocate(int16_t size, int16_t alignment, bool bFwd=false);
>  
>      /*! Free the given register file piece */
>      void deallocate(int16_t offset);
> @@ -299,6 +299,8 @@ namespace gbe
>        GBE_DELETE(this->kernel);
>        this->kernel = NULL;
>      }
> +    if(this->kernel != NULL)
> +      this->kernel->cxt = this;
>      return this->kernel;
>    }
>  
> @@ -308,6 +310,27 @@ namespace gbe
>  
>    void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); }
>  
> +  int32_t Context::allocConstBuf(uint32_t argID) {
> +     GBE_ASSERT(kernel->args[argID].type == GBE_ARG_CONSTANT_PTR);
> +
> +    //free previous
> +    int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
> +    if(offset >= 0)
> +        deallocate(offset+GEN_REG_SIZE);
> +
> +    if(kernel->args[argID].bufSize > 0) {
> +      //use 32 alignment here as GEN_REG_SIZE, need dynamic by type?
> +      newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_CONSTANT_BUFFER+argID, kernel->args[argID].bufSize, 32);
> +    }
> +
> +    std::sort(kernel->patches.begin(), kernel->patches.end());
> +    offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
> +    GBE_ASSERT(offset>=0);
> +
> +    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
> +    return offset + GEN_REG_SIZE;
> +  }
> +
>    void Context::buildStack(void) {
>      const auto &stackUse = dag->getUse(ir::ocl::stackptr);
>      if (stackUse.size() == 0)  // no stack is used if stackptr is unused
> diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
> index 55a63a7..245ad01 100644
> --- a/backend/src/backend/context.hpp
> +++ b/backend/src/backend/context.hpp
> @@ -86,6 +86,8 @@ namespace gbe
>      int16_t allocate(int16_t size, int16_t alignment);
>      /*! Deallocate previously allocated memory */
>      void deallocate(int16_t offset);
> +    /* allocate curbe for constant ptr argument */
> +    int32_t allocConstBuf(uint32_t argID);
>    protected:
>      /*! Build the instruction stream. Return false if failed */
>      virtual bool emitCode(void) = 0;
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index b3d385b..1f867b8 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -144,7 +144,7 @@ namespace gbe
>      }
>    }
>  
> -  void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) { 
> +  void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
>      const GenRegister dst = ra->genReg(insn.dst(0));
>      const GenRegister src0 = ra->genReg(insn.src(0));
>      const GenRegister src1 = ra->genReg(insn.src(1));
> @@ -212,6 +212,32 @@ namespace gbe
>      }
>    }
>  
> +  void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
> +    const GenRegister src = GenRegister::unpacked_uw(ra->genReg(insn.src(0)).nr, 0);
> +    const GenRegister dst = ra->genReg(insn.dst(0));
> +    const GenRegister a0 = GenRegister::addr8(0);
> +    uint32_t simdWidth = p->curr.execWidth;
> +
> +    p->push();
> +      p->curr.execWidth = 8;
> +      p->curr.quarterControl = GEN_COMPRESSION_Q1;
> +      p->MOV(a0, src);
> +      p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> +    p->pop();
> +
> +    if (simdWidth == 16) {
> +      p->push();
> +        p->curr.execWidth = 8;
> +        p->curr.quarterControl = GEN_COMPRESSION_Q2;
> +
> +        const GenRegister nextDst = GenRegister::Qn(dst, 1);
> +        const GenRegister nextSrc = GenRegister::Qn(src, 1);
> +        p->MOV(a0, nextSrc);
> +        p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> +      p->pop();
> +    }
> +  }
> +
>    void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
>      const ir::LabelIndex label(insn.index);
>      const GenRegister src = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 6af174f..33258f8 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -80,6 +80,7 @@ namespace gbe
>      void emitTernaryInstruction(const SelectionInstruction &insn);
>      void emitCompareInstruction(const SelectionInstruction &insn);
>      void emitJumpInstruction(const SelectionInstruction &insn);
> +    void emitCBMoveInstruction(const SelectionInstruction &insn);
>      void emitEotInstruction(const SelectionInstruction &insn);
>      void emitNoOpInstruction(const SelectionInstruction &insn);
>      void emitWaitInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 969ec82..ce8769f 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -5,6 +5,7 @@ DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
>  DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
>  DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
>  DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
> +DECL_GEN7_SCHEDULE(CBMove,          20,        2,        2)
>  DECL_GEN7_SCHEDULE(Eot,             20,        1,        1)
>  DECL_GEN7_SCHEDULE(NoOp,            20,        2,        2)
>  DECL_GEN7_SCHEDULE(Wait,            20,        2,        2)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index e0e8920..34aba5b 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -25,7 +25,7 @@
>  /* This is the instruction selection code. First of all, this is a bunch of c++
>   * crap. Sorry if this is not that readable. Anyway, the goal here is to take
>   * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
> - * virtual registers (i.e. regular GenIR registers). 
> + * virtual registers (i.e. regular GenIR registers).
>   *
>   * Overall idea:
>   * =============
> @@ -72,7 +72,7 @@
>   * *same* flag register for the predicates (used for masking) and the
>   * conditional modifier (used as a destination for CMP). This leads to extra
>   * complications with compare instructions and select instructions. Basically,
> - * we need to insert extra MOVs. 
> + * we need to insert extra MOVs.
>   *
>   * Also, there is some extra kludge to handle the predicates for JMPI.
>   *
> @@ -439,6 +439,8 @@ namespace gbe
>      void CMP(uint32_t conditional, Reg src0, Reg src1);
>      /*! Select instruction with embedded comparison */
>      void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
> +    /* Constant buffer move instruction */
> +    void CB_MOVE(Reg dst, Reg src);
>      /*! EOT is used to finish GPGPU threads */
>      void EOT(void);
>      /*! No-op */
> @@ -481,7 +483,7 @@ namespace gbe
>    static void markAllChildren(SelectionDAG &dag) {
>      // Do not merge anything, so all sources become roots
>      for (uint32_t childID = 0; childID < dag.childNum; ++childID)
> -      if (dag.child[childID]) 
> +      if (dag.child[childID])
>          dag.child[childID]->isRoot = 1;
>    }
>  
> @@ -698,6 +700,11 @@ namespace gbe
>      insn->src(1) = src1;
>      insn->extra.function = conditional;
>    }
> +  void Selection::Opaque::CB_MOVE(Reg dst, Reg src) {
> +    SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 1);
> +    insn->dst(0) = dst;
> +    insn->src(0) = src;
> +  }
>  
>    void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
>    void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
> @@ -1057,7 +1064,7 @@ namespace gbe
>    // Implementation of all patterns
>    ///////////////////////////////////////////////////////////////////////////
>  
> -  GenRegister getRegisterFromImmediate(ir::Immediate imm) 
> +  GenRegister getRegisterFromImmediate(ir::Immediate imm)
>    {
>      using namespace ir;
>      switch (imm.type) {
> @@ -1654,15 +1661,54 @@ namespace gbe
>          sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
>      }
>  
> +    void emitCBMove(Selection::Opaque &sel,
> +                         const ir::LoadInstruction &insn,
> +                         GenRegister address) const
> +    {
> +      using namespace ir;
> +      GBE_ASSERT(insn.getValueNum() == 1);   //todo: handle vec later
> +
> +      const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
> +      const GenRegister src = address;
> +      sel.CB_MOVE(dst, src);
> +
> +      //need to implement adress register's schedule if use ia move here
> +      /*
> +      const GenRegister a0 = GenRegister::addr8(0);
> +      const uint32_t simdWidth = sel.curr.execWidth;
> +
> +      sel.push();
> +        sel.curr.execWidth = 8;
> +        sel.curr.quarterControl = GEN_COMPRESSION_Q1;
> +        sel.MOV(a0, src);
> +        sel.MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> +      sel.pop();
> +
> +      if (simdWidth == 16) {
> +        sel.push();
> +          sel.curr.execWidth = 8;
> +          sel.curr.quarterControl = GEN_COMPRESSION_Q2;
> +
> +          const GenRegister nextDst = GenRegister::Qn(dst, 1);
> +          const GenRegister nextSrc = GenRegister::Qn(src, 1);
> +          sel.MOV(a0, nextSrc);
> +          sel.MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> +        sel.pop();
> +        } */
       The above code should be removed as you already commentted them out.
> +    }
> +
>      INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
>        using namespace ir;
>        const GenRegister address = sel.selReg(insn.getAddress());
>        const AddressSpace space = insn.getAddressSpace();
>        GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
> +                 insn.getAddressSpace() == MEM_CONSTANT ||
>                   insn.getAddressSpace() == MEM_PRIVATE ||
>                   insn.getAddressSpace() == MEM_LOCAL);
>        GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
> -      if (insn.isAligned() == true)
> +      if (insn.getAddressSpace() == MEM_CONSTANT)
> +        this->emitCBMove(sel, insn, address);
> +      else if (insn.isAligned() == true)
>          this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
>        else {
>          const GenRegister value = sel.selReg(insn.getValue(0));
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 691100b..f89ad4c 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
>  DECL_SELECTION_IR(MAD, TernaryInstruction)
>  DECL_SELECTION_IR(JMPI, JumpInstruction)
>  DECL_SELECTION_IR(EOT, EotInstruction)
> +DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
>  DECL_SELECTION_IR(NOP, NoOpInstruction)
>  DECL_SELECTION_IR(WAIT, WaitInstruction)
>  DECL_SELECTION_IR(MATH, MathInstruction)
> diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
> index 8a7efdb..3d7bedd 100644
> --- a/backend/src/backend/gen_program.cpp
> +++ b/backend/src/backend/gen_program.cpp
> @@ -76,9 +76,10 @@ namespace gbe {
>        unit.getFunction(name)->setSimdWidth(simdWidth);
>        Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
>        kernel = ctx->compileKernel();
> -      GBE_DELETE(ctx);
> -      if (kernel != NULL)
> +      if (kernel != NULL) {
>          break;
> +      }
> +      GBE_DELETE(ctx);
>      }
>  
>      // XXX spill must be implemented
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 92122a6..d772b0d 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -725,6 +725,16 @@ namespace gbe
>        return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
>      }
>  
> +    static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
> +      return GenRegister(GEN_GENERAL_REGISTER_FILE,
> +                         nr,
> +                         subnr,
> +                         GEN_TYPE_UW,
> +                         GEN_VERTICAL_STRIDE_16,
> +                         GEN_WIDTH_8,
> +                         GEN_HORIZONTAL_STRIDE_2);
> +    }
> +
>      static INLINE GenRegister mask(uint32_t subnr) {
>        return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
>      }
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index d33c533..4943a5d 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -49,9 +49,10 @@
>  namespace gbe {
>  
>    Kernel::Kernel(const std::string &name) :
> -    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false)
> +    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), cxt(NULL)
>    {}
>    Kernel::~Kernel(void) {
> +    if(cxt) GBE_DELETE(cxt);
>      GBE_SAFE_DELETE_ARRAY(args);
>    }
>    int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
> @@ -229,6 +230,12 @@ namespace gbe {
>      return kernel->getUseSLM() ? 1 : 0;
>    }
>  
> +  static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) {
> +    if (genKernel == NULL) return -1;
> +    gbe::Kernel *kernel = (gbe::Kernel*) genKernel;
> +    return kernel->setConstBufSize(argID, sz);
> +  }
> +
>    static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
>      return 0u;
>    }
> @@ -251,6 +258,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL
>  GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL;
> +GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
>  GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
>  
> @@ -275,6 +283,7 @@ namespace gbe
>        gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
>        gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
>        gbe_kernel_get_stack_size = gbe::kernelGetStackSize;
> +      gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
>        gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
>        gbe_kernel_use_slm = gbe::kernelUseSLM;
>        genSetupCallBacks();
> diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
> index b90c1df..4273a77 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -81,7 +81,8 @@ enum gbe_curbe_type {
>  
>  /*! Extra arguments use the negative range of sub-values */
>  enum gbe_extra_argument {
> -  GBE_STACK_BUFFER = 0 /* Give stack location in curbe */
> +  GBE_STACK_BUFFER = 0,   /* Give stack location in curbe */
> +  GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
>  };
>  
>  /*! Create a new program from the given source code (zero terminated string) */
> @@ -159,6 +160,10 @@ extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size;
>  typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type);
>  extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset;
>  
> +/*! Set the constant pointer arg size and return the cb offset in curbe */
> +typedef int32_t (gbe_kernel_set_const_buffer_size_cb)(gbe_kernel, uint32_t argID, size_t sz);
> +extern gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size;
> +
>  /*! Indicates if a work group size is required. Return the required width or 0
>   *  if none
>   */
> diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
> index e0f7dba..334c1b2 100644
> --- a/backend/src/backend/program.hpp
> +++ b/backend/src/backend/program.hpp
> @@ -1,4 +1,4 @@
> -/* 
> +/*
>   * Copyright © 2012 Intel Corporation
>   *
>   * This library is free software; you can redistribute it and/or
> @@ -26,6 +26,7 @@
>  #define __GBE_PROGRAM_HPP__
>  
>  #include "backend/program.h"
> +#include "backend/context.hpp"
>  #include "sys/hash_map.hpp"
>  #include "sys/vector.hpp"
>  #include <string>
> @@ -42,6 +43,7 @@ namespace gbe {
>    struct KernelArgument {
>      gbe_arg_type type; //!< Pointer, structure, image, regular value?
>      uint32_t size;     //!< Size of the argument
> +    uint32_t bufSize;  //!< Contant buffer size
>    };
>  
>    /*! Stores the offset where to patch where to patch */
> @@ -94,6 +96,16 @@ namespace gbe {
>      INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
>      /*! Says if SLM is needed for it */
>      INLINE bool getUseSLM(void) const { return this->useSLM; }
> +    /*! set constant buffer size and return the cb curbe offset */
> +    int32_t setConstBufSize(uint32_t argID, size_t sz) {
> +      if(argID >= argNum) return -1;
> +      if(args[argID].type != GBE_ARG_CONSTANT_PTR) return -1;
> +      if(args[argID].bufSize != sz) {
> +        args[argID].bufSize = sz;
> +        return cxt->allocConstBuf(argID);
> +      }
> +      return -1;
> +    }
>    protected:
>      friend class Context;      //!< Owns the kernels
>      const std::string name;    //!< Kernel name
> @@ -104,6 +116,7 @@ namespace gbe {
>      uint32_t simdWidth;        //!< SIMD size for the kernel (lane number)
>      uint32_t stackSize;        //!< Stack size (may be 0 if unused)
>      bool useSLM;               //!< SLM requires a special HW config
> +    Context *cxt;              //!< Save cxt after compiler to alloc constant buffer curbe
       Seems a typo here. should be ctx, right?
>      GBE_CLASS(Kernel);         //!< Use custom allocators
>    };
>  
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list