[Beignet] [PATCH v3 2/4] Add constant pointer as argument support in kernel.
Yang Rong
rong.r.yang at intel.com
Sun Apr 21 22:11:50 PDT 2013
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/context.cpp | 25 ++++++++-
backend/src/backend/context.hpp | 2 +
backend/src/backend/gen_context.cpp | 28 +++++++++-
backend/src/backend/gen_context.hpp | 1 +
.../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
backend/src/backend/gen_insn_selection.cpp | 58 ++++++++++++++++++--
backend/src/backend/gen_insn_selection.hxx | 1 +
backend/src/backend/gen_program.cpp | 5 +-
backend/src/backend/gen_register.hpp | 10 ++++
backend/src/backend/program.cpp | 13 ++++-
backend/src/backend/program.h | 7 ++-
backend/src/backend/program.hpp | 15 ++++-
12 files changed, 152 insertions(+), 14 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index c3ddb59..4e16fc0 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -53,7 +53,7 @@ namespace gbe
* the hardware. Note that we always use the left most block when
* allocating, so it makes sense for constant pushing
*/
- int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
+ int16_t allocate(int16_t size, int16_t alignment, bool bFwd=false);
/*! Free the given register file piece */
void deallocate(int16_t offset);
@@ -299,6 +299,8 @@ namespace gbe
GBE_DELETE(this->kernel);
this->kernel = NULL;
}
+ if(this->kernel != NULL)
+ this->kernel->cxt = this;
return this->kernel;
}
@@ -308,6 +310,27 @@ namespace gbe
void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); }
+ int32_t Context::allocConstBuf(uint32_t argID) {
+ GBE_ASSERT(kernel->args[argID].type == GBE_ARG_CONSTANT_PTR);
+
+ //free previous
+ int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
+ if(offset >= 0)
+ deallocate(offset+GEN_REG_SIZE);
+
+ if(kernel->args[argID].bufSize > 0) {
+ //use 32 alignment here as GEN_REG_SIZE, need dynamic by type?
+ newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_CONSTANT_BUFFER+argID, kernel->args[argID].bufSize, 32);
+ }
+
+ std::sort(kernel->patches.begin(), kernel->patches.end());
+ offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
+ GBE_ASSERT(offset>=0);
+
+ kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+ return offset + GEN_REG_SIZE;
+ }
+
void Context::buildStack(void) {
const auto &stackUse = dag->getUse(ir::ocl::stackptr);
if (stackUse.size() == 0) // no stack is used if stackptr is unused
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 55a63a7..245ad01 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -86,6 +86,8 @@ namespace gbe
int16_t allocate(int16_t size, int16_t alignment);
/*! Deallocate previously allocated memory */
void deallocate(int16_t offset);
+ /* allocate curbe for constant ptr argument */
+ int32_t allocConstBuf(uint32_t argID);
protected:
/*! Build the instruction stream. Return false if failed */
virtual bool emitCode(void) = 0;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index b3d385b..1f867b8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -144,7 +144,7 @@ namespace gbe
}
}
- void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
+ void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
const GenRegister dst = ra->genReg(insn.dst(0));
const GenRegister src0 = ra->genReg(insn.src(0));
const GenRegister src1 = ra->genReg(insn.src(1));
@@ -212,6 +212,32 @@ namespace gbe
}
}
+ void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
+ const GenRegister src = GenRegister::unpacked_uw(ra->genReg(insn.src(0)).nr, 0);
+ const GenRegister dst = ra->genReg(insn.dst(0));
+ const GenRegister a0 = GenRegister::addr8(0);
+ uint32_t simdWidth = p->curr.execWidth;
+
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q1;
+ p->MOV(a0, src);
+ p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ p->pop();
+
+ if (simdWidth == 16) {
+ p->push();
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+
+ const GenRegister nextDst = GenRegister::Qn(dst, 1);
+ const GenRegister nextSrc = GenRegister::Qn(src, 1);
+ p->MOV(a0, nextSrc);
+ p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ p->pop();
+ }
+ }
+
void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
const ir::LabelIndex label(insn.index);
const GenRegister src = ra->genReg(insn.src(0));
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 6af174f..33258f8 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -80,6 +80,7 @@ namespace gbe
void emitTernaryInstruction(const SelectionInstruction &insn);
void emitCompareInstruction(const SelectionInstruction &insn);
void emitJumpInstruction(const SelectionInstruction &insn);
+ void emitCBMoveInstruction(const SelectionInstruction &insn);
void emitEotInstruction(const SelectionInstruction &insn);
void emitNoOpInstruction(const SelectionInstruction &insn);
void emitWaitInstruction(const SelectionInstruction &insn);
diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
index 969ec82..ce8769f 100644
--- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
+++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
@@ -5,6 +5,7 @@ DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
+DECL_GEN7_SCHEDULE(CBMove, 20, 2, 2)
DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index e0e8920..34aba5b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -25,7 +25,7 @@
/* This is the instruction selection code. First of all, this is a bunch of c++
* crap. Sorry if this is not that readable. Anyway, the goal here is to take
* GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
- * virtual registers (i.e. regular GenIR registers).
+ * virtual registers (i.e. regular GenIR registers).
*
* Overall idea:
* =============
@@ -72,7 +72,7 @@
* *same* flag register for the predicates (used for masking) and the
* conditional modifier (used as a destination for CMP). This leads to extra
* complications with compare instructions and select instructions. Basically,
- * we need to insert extra MOVs.
+ * we need to insert extra MOVs.
*
* Also, there is some extra kludge to handle the predicates for JMPI.
*
@@ -439,6 +439,8 @@ namespace gbe
void CMP(uint32_t conditional, Reg src0, Reg src1);
/*! Select instruction with embedded comparison */
void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
+ /* Constant buffer move instruction */
+ void CB_MOVE(Reg dst, Reg src);
/*! EOT is used to finish GPGPU threads */
void EOT(void);
/*! No-op */
@@ -481,7 +483,7 @@ namespace gbe
static void markAllChildren(SelectionDAG &dag) {
// Do not merge anything, so all sources become roots
for (uint32_t childID = 0; childID < dag.childNum; ++childID)
- if (dag.child[childID])
+ if (dag.child[childID])
dag.child[childID]->isRoot = 1;
}
@@ -698,6 +700,11 @@ namespace gbe
insn->src(1) = src1;
insn->extra.function = conditional;
}
+ void Selection::Opaque::CB_MOVE(Reg dst, Reg src) {
+ SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 1);
+ insn->dst(0) = dst;
+ insn->src(0) = src;
+ }
void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
@@ -1057,7 +1064,7 @@ namespace gbe
// Implementation of all patterns
///////////////////////////////////////////////////////////////////////////
- GenRegister getRegisterFromImmediate(ir::Immediate imm)
+ GenRegister getRegisterFromImmediate(ir::Immediate imm)
{
using namespace ir;
switch (imm.type) {
@@ -1654,15 +1661,54 @@ namespace gbe
sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
}
+ void emitCBMove(Selection::Opaque &sel,
+ const ir::LoadInstruction &insn,
+ GenRegister address) const
+ {
+ using namespace ir;
+ GBE_ASSERT(insn.getValueNum() == 1); //todo: handle vec later
+
+ const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
+ const GenRegister src = address;
+ sel.CB_MOVE(dst, src);
+
+ //need to implement adress register's schedule if use ia move here
+ /*
+ const GenRegister a0 = GenRegister::addr8(0);
+ const uint32_t simdWidth = sel.curr.execWidth;
+
+ sel.push();
+ sel.curr.execWidth = 8;
+ sel.curr.quarterControl = GEN_COMPRESSION_Q1;
+ sel.MOV(a0, src);
+ sel.MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ sel.pop();
+
+ if (simdWidth == 16) {
+ sel.push();
+ sel.curr.execWidth = 8;
+ sel.curr.quarterControl = GEN_COMPRESSION_Q2;
+
+ const GenRegister nextDst = GenRegister::Qn(dst, 1);
+ const GenRegister nextSrc = GenRegister::Qn(src, 1);
+ sel.MOV(a0, nextSrc);
+ sel.MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
+ sel.pop();
+ } */
+ }
+
INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
using namespace ir;
const GenRegister address = sel.selReg(insn.getAddress());
const AddressSpace space = insn.getAddressSpace();
GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
+ insn.getAddressSpace() == MEM_CONSTANT ||
insn.getAddressSpace() == MEM_PRIVATE ||
insn.getAddressSpace() == MEM_LOCAL);
GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
- if (insn.isAligned() == true)
+ if (insn.getAddressSpace() == MEM_CONSTANT)
+ this->emitCBMove(sel, insn, address);
+ else if (insn.isAligned() == true)
this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
else {
const GenRegister value = sel.selReg(insn.getValue(0));
diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
index 691100b..f89ad4c 100644
--- a/backend/src/backend/gen_insn_selection.hxx
+++ b/backend/src/backend/gen_insn_selection.hxx
@@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
DECL_SELECTION_IR(MAD, TernaryInstruction)
DECL_SELECTION_IR(JMPI, JumpInstruction)
DECL_SELECTION_IR(EOT, EotInstruction)
+DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
DECL_SELECTION_IR(NOP, NoOpInstruction)
DECL_SELECTION_IR(WAIT, WaitInstruction)
DECL_SELECTION_IR(MATH, MathInstruction)
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 8a7efdb..3d7bedd 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -76,9 +76,10 @@ namespace gbe {
unit.getFunction(name)->setSimdWidth(simdWidth);
Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
kernel = ctx->compileKernel();
- GBE_DELETE(ctx);
- if (kernel != NULL)
+ if (kernel != NULL) {
break;
+ }
+ GBE_DELETE(ctx);
}
// XXX spill must be implemented
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 92122a6..d772b0d 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -725,6 +725,16 @@ namespace gbe
return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
}
+ static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
+ return GenRegister(GEN_GENERAL_REGISTER_FILE,
+ nr,
+ subnr,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_16,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_2);
+ }
+
static INLINE GenRegister mask(uint32_t subnr) {
return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
}
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index d33c533..4943a5d 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -49,9 +49,10 @@
namespace gbe {
Kernel::Kernel(const std::string &name) :
- name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false)
+ name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), cxt(NULL)
{}
Kernel::~Kernel(void) {
+ if(cxt) GBE_DELETE(cxt);
GBE_SAFE_DELETE_ARRAY(args);
}
int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
@@ -229,6 +230,12 @@ namespace gbe {
return kernel->getUseSLM() ? 1 : 0;
}
+ static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) {
+ if (genKernel == NULL) return -1;
+ gbe::Kernel *kernel = (gbe::Kernel*) genKernel;
+ return kernel->setConstBufSize(argID, sz);
+ }
+
static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
return 0u;
}
@@ -251,6 +258,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL
GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL;
+GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
@@ -275,6 +283,7 @@ namespace gbe
gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
gbe_kernel_get_stack_size = gbe::kernelGetStackSize;
+ gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
gbe_kernel_use_slm = gbe::kernelUseSLM;
genSetupCallBacks();
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index b90c1df..4273a77 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -81,7 +81,8 @@ enum gbe_curbe_type {
/*! Extra arguments use the negative range of sub-values */
enum gbe_extra_argument {
- GBE_STACK_BUFFER = 0 /* Give stack location in curbe */
+ GBE_STACK_BUFFER = 0, /* Give stack location in curbe */
+ GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
};
/*! Create a new program from the given source code (zero terminated string) */
@@ -159,6 +160,10 @@ extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size;
typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type);
extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset;
+/*! Set the constant pointer arg size and return the cb offset in curbe */
+typedef int32_t (gbe_kernel_set_const_buffer_size_cb)(gbe_kernel, uint32_t argID, size_t sz);
+extern gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size;
+
/*! Indicates if a work group size is required. Return the required width or 0
* if none
*/
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index e0f7dba..334c1b2 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -26,6 +26,7 @@
#define __GBE_PROGRAM_HPP__
#include "backend/program.h"
+#include "backend/context.hpp"
#include "sys/hash_map.hpp"
#include "sys/vector.hpp"
#include <string>
@@ -42,6 +43,7 @@ namespace gbe {
struct KernelArgument {
gbe_arg_type type; //!< Pointer, structure, image, regular value?
uint32_t size; //!< Size of the argument
+ uint32_t bufSize; //!< Contant buffer size
};
/*! Stores the offset where to patch where to patch */
@@ -94,6 +96,16 @@ namespace gbe {
INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
/*! Says if SLM is needed for it */
INLINE bool getUseSLM(void) const { return this->useSLM; }
+ /*! set constant buffer size and return the cb curbe offset */
+ int32_t setConstBufSize(uint32_t argID, size_t sz) {
+ if(argID >= argNum) return -1;
+ if(args[argID].type != GBE_ARG_CONSTANT_PTR) return -1;
+ if(args[argID].bufSize != sz) {
+ args[argID].bufSize = sz;
+ return cxt->allocConstBuf(argID);
+ }
+ return -1;
+ }
protected:
friend class Context; //!< Owns the kernels
const std::string name; //!< Kernel name
@@ -104,6 +116,7 @@ namespace gbe {
uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
uint32_t stackSize; //!< Stack size (may be 0 if unused)
bool useSLM; //!< SLM requires a special HW config
+ Context *cxt; //!< Save cxt after compiler to alloc constant buffer curbe
GBE_CLASS(Kernel); //!< Use custom allocators
};
--
1.7.9.5
More information about the Beignet
mailing list