[Beignet] [PATCH v3 2/4] Add constant pointer as argument support in kernel.

Yang Rong rong.r.yang at intel.com
Sun Apr 21 22:11:50 PDT 2013


Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/backend/context.cpp                    |   25 ++++++++-
 backend/src/backend/context.hpp                    |    2 +
 backend/src/backend/gen_context.cpp                |   28 +++++++++-
 backend/src/backend/gen_context.hpp                |    1 +
 .../src/backend/gen_insn_gen7_schedule_info.hxx    |    1 +
 backend/src/backend/gen_insn_selection.cpp         |   58 ++++++++++++++++++--
 backend/src/backend/gen_insn_selection.hxx         |    1 +
 backend/src/backend/gen_program.cpp                |    5 +-
 backend/src/backend/gen_register.hpp               |   10 ++++
 backend/src/backend/program.cpp                    |   13 ++++-
 backend/src/backend/program.h                      |    7 ++-
 backend/src/backend/program.hpp                    |   15 ++++-
 12 files changed, 152 insertions(+), 14 deletions(-)

diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index c3ddb59..4e16fc0 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -53,7 +53,7 @@ namespace gbe
      *  the hardware. Note that we always use the left most block when
      *  allocating, so it makes sense for constant pushing
      */
-    int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
+    int16_t allocate(int16_t size, int16_t alignment, bool bFwd=false);
 
     /*! Free the given register file piece */
     void deallocate(int16_t offset);
@@ -299,6 +299,8 @@ namespace gbe
       GBE_DELETE(this->kernel);
       this->kernel = NULL;
     }
+    if(this->kernel != NULL)
+      this->kernel->cxt = this;
     return this->kernel;
   }
 
@@ -308,6 +310,27 @@ namespace gbe
 
   void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); }
 
+  int32_t Context::allocConstBuf(uint32_t argID) {
+     GBE_ASSERT(kernel->args[argID].type == GBE_ARG_CONSTANT_PTR);
+
+    //free previous
+    int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
+    if(offset >= 0)
+        deallocate(offset+GEN_REG_SIZE);
+
+    if(kernel->args[argID].bufSize > 0) {
+      //use 32 alignment here as GEN_REG_SIZE, need dynamic by type?
+      newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_CONSTANT_BUFFER+argID, kernel->args[argID].bufSize, 32);
+    }
+
+    std::sort(kernel->patches.begin(), kernel->patches.end());
+    offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
+    GBE_ASSERT(offset>=0);
+
+    kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+    return offset + GEN_REG_SIZE;
+  }
+
   void Context::buildStack(void) {
     const auto &stackUse = dag->getUse(ir::ocl::stackptr);
     if (stackUse.size() == 0)  // no stack is used if stackptr is unused
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 55a63a7..245ad01 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -86,6 +86,8 @@ namespace gbe
     int16_t allocate(int16_t size, int16_t alignment);
     /*! Deallocate previously allocated memory */
     void deallocate(int16_t offset);
+    /* allocate curbe for constant ptr argument */
+    int32_t allocConstBuf(uint32_t argID);
   protected:
     /*! Build the instruction stream. Return false if failed */
     virtual bool emitCode(void) = 0;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b3d385b..1f867b8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -144,7 +144,7 @@ namespace gbe
     }
   }
 
-  void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) { 
+  void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
     const GenRegister dst = ra->genReg(insn.dst(0));
     const GenRegister src0 = ra->genReg(insn.src(0));
     const GenRegister src1 = ra->genReg(insn.src(1));
@@ -212,6 +212,32 @@ namespace gbe
     }
   }
 
+  void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
+    const GenRegister src = GenRegister::unpacked_uw(ra->genReg(insn.src(0)).nr, 0);
+    const GenRegister dst = ra->genReg(insn.dst(0));
+    const GenRegister a0 = GenRegister::addr8(0);
+    uint32_t simdWidth = p->curr.execWidth;
+
+    p->push();
+      p->curr.execWidth = 8;
+      p->curr.quarterControl = GEN_COMPRESSION_Q1;
+      p->MOV(a0, src);
+      p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+    p->pop();
+
+    if (simdWidth == 16) {
+      p->push();
+        p->curr.execWidth = 8;
+        p->curr.quarterControl = GEN_COMPRESSION_Q2;
+
+        const GenRegister nextDst = GenRegister::Qn(dst, 1);
+        const GenRegister nextSrc = GenRegister::Qn(src, 1);
+        p->MOV(a0, nextSrc);
+        p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+      p->pop();
+    }
+  }
+
   void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
     const ir::LabelIndex label(insn.index);
     const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6af174f..33258f8 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -80,6 +80,7 @@ namespace gbe
     void emitTernaryInstruction(const SelectionInstruction &insn);
     void emitCompareInstruction(const SelectionInstruction &insn);
     void emitJumpInstruction(const SelectionInstruction &insn);
+    void emitCBMoveInstruction(const SelectionInstruction &insn);
     void emitEotInstruction(const SelectionInstruction &insn);
     void emitNoOpInstruction(const SelectionInstruction &insn);
     void emitWaitInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 969ec82..ce8769f 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -5,6 +5,7 @@ DECL_GEN7_SCHEDULE(Binary,          20,        4,        2)
 DECL_GEN7_SCHEDULE(Ternary,         20,        4,        2)
 DECL_GEN7_SCHEDULE(Compare,         20,        4,        2)
 DECL_GEN7_SCHEDULE(Jump,            14,        1,        1)
+DECL_GEN7_SCHEDULE(CBMove,          20,        2,        2)
 DECL_GEN7_SCHEDULE(Eot,             20,        1,        1)
 DECL_GEN7_SCHEDULE(NoOp,            20,        2,        2)
 DECL_GEN7_SCHEDULE(Wait,            20,        2,        2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e0e8920..34aba5b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -25,7 +25,7 @@
 /* This is the instruction selection code. First of all, this is a bunch of c++
  * crap. Sorry if this is not that readable. Anyway, the goal here is to take
  * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
- * virtual registers (i.e. regular GenIR registers). 
+ * virtual registers (i.e. regular GenIR registers).
  *
  * Overall idea:
  * =============
@@ -72,7 +72,7 @@
  * *same* flag register for the predicates (used for masking) and the
  * conditional modifier (used as a destination for CMP). This leads to extra
  * complications with compare instructions and select instructions. Basically,
- * we need to insert extra MOVs. 
+ * we need to insert extra MOVs.
  *
  * Also, there is some extra kludge to handle the predicates for JMPI.
  *
@@ -439,6 +439,8 @@ namespace gbe
     void CMP(uint32_t conditional, Reg src0, Reg src1);
     /*! Select instruction with embedded comparison */
     void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
+    /* Constant buffer move instruction */
+    void CB_MOVE(Reg dst, Reg src);
     /*! EOT is used to finish GPGPU threads */
     void EOT(void);
     /*! No-op */
@@ -481,7 +483,7 @@ namespace gbe
   static void markAllChildren(SelectionDAG &dag) {
     // Do not merge anything, so all sources become roots
     for (uint32_t childID = 0; childID < dag.childNum; ++childID)
-      if (dag.child[childID]) 
+      if (dag.child[childID])
         dag.child[childID]->isRoot = 1;
   }
 
@@ -698,6 +700,11 @@ namespace gbe
     insn->src(1) = src1;
     insn->extra.function = conditional;
   }
+  void Selection::Opaque::CB_MOVE(Reg dst, Reg src) {
+    SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 1);
+    insn->dst(0) = dst;
+    insn->src(0) = src;
+  }
 
   void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
   void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
@@ -1057,7 +1064,7 @@ namespace gbe
   // Implementation of all patterns
   ///////////////////////////////////////////////////////////////////////////
 
-  GenRegister getRegisterFromImmediate(ir::Immediate imm) 
+  GenRegister getRegisterFromImmediate(ir::Immediate imm)
   {
     using namespace ir;
     switch (imm.type) {
@@ -1654,15 +1661,54 @@ namespace gbe
         sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
     }
 
+    void emitCBMove(Selection::Opaque &sel,
+                         const ir::LoadInstruction &insn,
+                         GenRegister address) const
+    {
+      using namespace ir;
+      GBE_ASSERT(insn.getValueNum() == 1);   //todo: handle vec later
+
+      const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
+      const GenRegister src = address;
+      sel.CB_MOVE(dst, src);
+
+      //need to implement adress register's schedule if use ia move here
+      /*
+      const GenRegister a0 = GenRegister::addr8(0);
+      const uint32_t simdWidth = sel.curr.execWidth;
+
+      sel.push();
+        sel.curr.execWidth = 8;
+        sel.curr.quarterControl = GEN_COMPRESSION_Q1;
+        sel.MOV(a0, src);
+        sel.MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+      sel.pop();
+
+      if (simdWidth == 16) {
+        sel.push();
+          sel.curr.execWidth = 8;
+          sel.curr.quarterControl = GEN_COMPRESSION_Q2;
+
+          const GenRegister nextDst = GenRegister::Qn(dst, 1);
+          const GenRegister nextSrc = GenRegister::Qn(src, 1);
+          sel.MOV(a0, nextSrc);
+          sel.MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+        sel.pop();
+        } */
+    }
+
     INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
       using namespace ir;
       const GenRegister address = sel.selReg(insn.getAddress());
       const AddressSpace space = insn.getAddressSpace();
       GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+                 insn.getAddressSpace() == MEM_CONSTANT ||
                  insn.getAddressSpace() == MEM_PRIVATE ||
                  insn.getAddressSpace() == MEM_LOCAL);
       GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
-      if (insn.isAligned() == true)
+      if (insn.getAddressSpace() == MEM_CONSTANT)
+        this->emitCBMove(sel, insn, address);
+      else if (insn.isAligned() == true)
         this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
       else {
         const GenRegister value = sel.selReg(insn.getValue(0));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 691100b..f89ad4c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
 DECL_SELECTION_IR(MAD, TernaryInstruction)
 DECL_SELECTION_IR(JMPI, JumpInstruction)
 DECL_SELECTION_IR(EOT, EotInstruction)
+DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
 DECL_SELECTION_IR(NOP, NoOpInstruction)
 DECL_SELECTION_IR(WAIT, WaitInstruction)
 DECL_SELECTION_IR(MATH, MathInstruction)
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 8a7efdb..3d7bedd 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -76,9 +76,10 @@ namespace gbe {
       unit.getFunction(name)->setSimdWidth(simdWidth);
       Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
       kernel = ctx->compileKernel();
-      GBE_DELETE(ctx);
-      if (kernel != NULL)
+      if (kernel != NULL) {
         break;
+      }
+      GBE_DELETE(ctx);
     }
 
     // XXX spill must be implemented
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 92122a6..d772b0d 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -725,6 +725,16 @@ namespace gbe
       return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
     }
 
+    static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
+      return GenRegister(GEN_GENERAL_REGISTER_FILE,
+                         nr,
+                         subnr,
+                         GEN_TYPE_UW,
+                         GEN_VERTICAL_STRIDE_16,
+                         GEN_WIDTH_8,
+                         GEN_HORIZONTAL_STRIDE_2);
+    }
+
     static INLINE GenRegister mask(uint32_t subnr) {
       return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
     }
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index d33c533..4943a5d 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -49,9 +49,10 @@
 namespace gbe {
 
   Kernel::Kernel(const std::string &name) :
-    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false)
+    name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), cxt(NULL)
   {}
   Kernel::~Kernel(void) {
+    if(cxt) GBE_DELETE(cxt);
     GBE_SAFE_DELETE_ARRAY(args);
   }
   int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
@@ -229,6 +230,12 @@ namespace gbe {
     return kernel->getUseSLM() ? 1 : 0;
   }
 
+  static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) {
+    if (genKernel == NULL) return -1;
+    gbe::Kernel *kernel = (gbe::Kernel*) genKernel;
+    return kernel->setConstBufSize(argID, sz);
+  }
+
   static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
     return 0u;
   }
@@ -251,6 +258,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL
 GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
 GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
 
@@ -275,6 +283,7 @@ namespace gbe
       gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
       gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
       gbe_kernel_get_stack_size = gbe::kernelGetStackSize;
+      gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
       gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
       gbe_kernel_use_slm = gbe::kernelUseSLM;
       genSetupCallBacks();
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index b90c1df..4273a77 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -81,7 +81,8 @@ enum gbe_curbe_type {
 
 /*! Extra arguments use the negative range of sub-values */
 enum gbe_extra_argument {
-  GBE_STACK_BUFFER = 0 /* Give stack location in curbe */
+  GBE_STACK_BUFFER = 0,   /* Give stack location in curbe */
+  GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
 };
 
 /*! Create a new program from the given source code (zero terminated string) */
@@ -159,6 +160,10 @@ extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size;
 typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type);
 extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset;
 
+/*! Set the constant pointer arg size and return the cb offset in curbe */
+typedef int32_t (gbe_kernel_set_const_buffer_size_cb)(gbe_kernel, uint32_t argID, size_t sz);
+extern gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size;
+
 /*! Indicates if a work group size is required. Return the required width or 0
  *  if none
  */
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index e0f7dba..334c1b2 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -26,6 +26,7 @@
 #define __GBE_PROGRAM_HPP__
 
 #include "backend/program.h"
+#include "backend/context.hpp"
 #include "sys/hash_map.hpp"
 #include "sys/vector.hpp"
 #include <string>
@@ -42,6 +43,7 @@ namespace gbe {
   struct KernelArgument {
     gbe_arg_type type; //!< Pointer, structure, image, regular value?
     uint32_t size;     //!< Size of the argument
+    uint32_t bufSize;  //!< Contant buffer size
   };
 
   /*! Stores the offset where to patch where to patch */
@@ -94,6 +96,16 @@ namespace gbe {
     INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
     /*! Says if SLM is needed for it */
     INLINE bool getUseSLM(void) const { return this->useSLM; }
+    /*! set constant buffer size and return the cb curbe offset */
+    int32_t setConstBufSize(uint32_t argID, size_t sz) {
+      if(argID >= argNum) return -1;
+      if(args[argID].type != GBE_ARG_CONSTANT_PTR) return -1;
+      if(args[argID].bufSize != sz) {
+        args[argID].bufSize = sz;
+        return cxt->allocConstBuf(argID);
+      }
+      return -1;
+    }
   protected:
     friend class Context;      //!< Owns the kernels
     const std::string name;    //!< Kernel name
@@ -104,6 +116,7 @@ namespace gbe {
     uint32_t simdWidth;        //!< SIMD size for the kernel (lane number)
     uint32_t stackSize;        //!< Stack size (may be 0 if unused)
     bool useSLM;               //!< SLM requires a special HW config
+    Context *cxt;              //!< Save cxt after compiler to alloc constant buffer curbe
     GBE_CLASS(Kernel);         //!< Use custom allocators
   };
 
-- 
1.7.9.5



More information about the Beignet mailing list