[Beignet] [PATCH v3 2/4] Add constant pointer as argument support in kernel.
Zhigang Gong
zhigang.gong at linux.intel.com
Mon Apr 22 02:21:44 PDT 2013
On Mon, Apr 22, 2013 at 01:11:50PM +0800, Yang Rong wrote:
Two minor problem, please check it as below. No need for v4 patch.
I will fix them. Thanks for the patchset.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> backend/src/backend/context.cpp | 25 ++++++++-
> backend/src/backend/context.hpp | 2 +
> backend/src/backend/gen_context.cpp | 28 +++++++++-
> backend/src/backend/gen_context.hpp | 1 +
> .../src/backend/gen_insn_gen7_schedule_info.hxx | 1 +
> backend/src/backend/gen_insn_selection.cpp | 58 ++++++++++++++++++--
> backend/src/backend/gen_insn_selection.hxx | 1 +
> backend/src/backend/gen_program.cpp | 5 +-
> backend/src/backend/gen_register.hpp | 10 ++++
> backend/src/backend/program.cpp | 13 ++++-
> backend/src/backend/program.h | 7 ++-
> backend/src/backend/program.hpp | 15 ++++-
> 12 files changed, 152 insertions(+), 14 deletions(-)
>
> diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
> index c3ddb59..4e16fc0 100644
> --- a/backend/src/backend/context.cpp
> +++ b/backend/src/backend/context.cpp
> @@ -53,7 +53,7 @@ namespace gbe
> * the hardware. Note that we always use the left most block when
> * allocating, so it makes sense for constant pushing
> */
> - int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
> + int16_t allocate(int16_t size, int16_t alignment, bool bFwd=false);
>
> /*! Free the given register file piece */
> void deallocate(int16_t offset);
> @@ -299,6 +299,8 @@ namespace gbe
> GBE_DELETE(this->kernel);
> this->kernel = NULL;
> }
> + if(this->kernel != NULL)
> + this->kernel->cxt = this;
> return this->kernel;
> }
>
> @@ -308,6 +310,27 @@ namespace gbe
>
> void Context::deallocate(int16_t offset) { partitioner->deallocate(offset); }
>
> + int32_t Context::allocConstBuf(uint32_t argID) {
> + GBE_ASSERT(kernel->args[argID].type == GBE_ARG_CONSTANT_PTR);
> +
> + //free previous
> + int32_t offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
> + if(offset >= 0)
> + deallocate(offset+GEN_REG_SIZE);
> +
> + if(kernel->args[argID].bufSize > 0) {
> + //use 32 alignment here as GEN_REG_SIZE, need dynamic by type?
> + newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_CONSTANT_BUFFER+argID, kernel->args[argID].bufSize, 32);
> + }
> +
> + std::sort(kernel->patches.begin(), kernel->patches.end());
> + offset = kernel->getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, argID+GBE_CONSTANT_BUFFER);
> + GBE_ASSERT(offset>=0);
> +
> + kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
> + return offset + GEN_REG_SIZE;
> + }
> +
> void Context::buildStack(void) {
> const auto &stackUse = dag->getUse(ir::ocl::stackptr);
> if (stackUse.size() == 0) // no stack is used if stackptr is unused
> diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
> index 55a63a7..245ad01 100644
> --- a/backend/src/backend/context.hpp
> +++ b/backend/src/backend/context.hpp
> @@ -86,6 +86,8 @@ namespace gbe
> int16_t allocate(int16_t size, int16_t alignment);
> /*! Deallocate previously allocated memory */
> void deallocate(int16_t offset);
> + /* allocate curbe for constant ptr argument */
> + int32_t allocConstBuf(uint32_t argID);
> protected:
> /*! Build the instruction stream. Return false if failed */
> virtual bool emitCode(void) = 0;
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index b3d385b..1f867b8 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -144,7 +144,7 @@ namespace gbe
> }
> }
>
> - void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
> + void GenContext::emitBinaryInstruction(const SelectionInstruction &insn) {
> const GenRegister dst = ra->genReg(insn.dst(0));
> const GenRegister src0 = ra->genReg(insn.src(0));
> const GenRegister src1 = ra->genReg(insn.src(1));
> @@ -212,6 +212,32 @@ namespace gbe
> }
> }
>
> + void GenContext::emitCBMoveInstruction(const SelectionInstruction &insn) {
> + const GenRegister src = GenRegister::unpacked_uw(ra->genReg(insn.src(0)).nr, 0);
> + const GenRegister dst = ra->genReg(insn.dst(0));
> + const GenRegister a0 = GenRegister::addr8(0);
> + uint32_t simdWidth = p->curr.execWidth;
> +
> + p->push();
> + p->curr.execWidth = 8;
> + p->curr.quarterControl = GEN_COMPRESSION_Q1;
> + p->MOV(a0, src);
> + p->MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> + p->pop();
> +
> + if (simdWidth == 16) {
> + p->push();
> + p->curr.execWidth = 8;
> + p->curr.quarterControl = GEN_COMPRESSION_Q2;
> +
> + const GenRegister nextDst = GenRegister::Qn(dst, 1);
> + const GenRegister nextSrc = GenRegister::Qn(src, 1);
> + p->MOV(a0, nextSrc);
> + p->MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> + p->pop();
> + }
> + }
> +
> void GenContext::emitJumpInstruction(const SelectionInstruction &insn) {
> const ir::LabelIndex label(insn.index);
> const GenRegister src = ra->genReg(insn.src(0));
> diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
> index 6af174f..33258f8 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -80,6 +80,7 @@ namespace gbe
> void emitTernaryInstruction(const SelectionInstruction &insn);
> void emitCompareInstruction(const SelectionInstruction &insn);
> void emitJumpInstruction(const SelectionInstruction &insn);
> + void emitCBMoveInstruction(const SelectionInstruction &insn);
> void emitEotInstruction(const SelectionInstruction &insn);
> void emitNoOpInstruction(const SelectionInstruction &insn);
> void emitWaitInstruction(const SelectionInstruction &insn);
> diff --git a/backend/src/backend/gen_insn_gen7_schedule_info.hxx b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> index 969ec82..ce8769f 100644
> --- a/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> +++ b/backend/src/backend/gen_insn_gen7_schedule_info.hxx
> @@ -5,6 +5,7 @@ DECL_GEN7_SCHEDULE(Binary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Ternary, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Compare, 20, 4, 2)
> DECL_GEN7_SCHEDULE(Jump, 14, 1, 1)
> +DECL_GEN7_SCHEDULE(CBMove, 20, 2, 2)
> DECL_GEN7_SCHEDULE(Eot, 20, 1, 1)
> DECL_GEN7_SCHEDULE(NoOp, 20, 2, 2)
> DECL_GEN7_SCHEDULE(Wait, 20, 2, 2)
> diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
> index e0e8920..34aba5b 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -25,7 +25,7 @@
> /* This is the instruction selection code. First of all, this is a bunch of c++
> * crap. Sorry if this is not that readable. Anyway, the goal here is to take
> * GenIR code (i.e. the very regular, very RISC IR) and to produce GenISA with
> - * virtual registers (i.e. regular GenIR registers).
> + * virtual registers (i.e. regular GenIR registers).
> *
> * Overall idea:
> * =============
> @@ -72,7 +72,7 @@
> * *same* flag register for the predicates (used for masking) and the
> * conditional modifier (used as a destination for CMP). This leads to extra
> * complications with compare instructions and select instructions. Basically,
> - * we need to insert extra MOVs.
> + * we need to insert extra MOVs.
> *
> * Also, there is some extra kludge to handle the predicates for JMPI.
> *
> @@ -439,6 +439,8 @@ namespace gbe
> void CMP(uint32_t conditional, Reg src0, Reg src1);
> /*! Select instruction with embedded comparison */
> void SEL_CMP(uint32_t conditional, Reg dst, Reg src0, Reg src1);
> + /* Constant buffer move instruction */
> + void CB_MOVE(Reg dst, Reg src);
> /*! EOT is used to finish GPGPU threads */
> void EOT(void);
> /*! No-op */
> @@ -481,7 +483,7 @@ namespace gbe
> static void markAllChildren(SelectionDAG &dag) {
> // Do not merge anything, so all sources become roots
> for (uint32_t childID = 0; childID < dag.childNum; ++childID)
> - if (dag.child[childID])
> + if (dag.child[childID])
> dag.child[childID]->isRoot = 1;
> }
>
> @@ -698,6 +700,11 @@ namespace gbe
> insn->src(1) = src1;
> insn->extra.function = conditional;
> }
> + void Selection::Opaque::CB_MOVE(Reg dst, Reg src) {
> + SelectionInstruction *insn = this->appendInsn(SEL_OP_CB_MOVE, 1, 1);
> + insn->dst(0) = dst;
> + insn->src(0) = src;
> + }
>
> void Selection::Opaque::EOT(void) { this->appendInsn(SEL_OP_EOT, 0, 0); }
> void Selection::Opaque::NOP(void) { this->appendInsn(SEL_OP_NOP, 0, 0); }
> @@ -1057,7 +1064,7 @@ namespace gbe
> // Implementation of all patterns
> ///////////////////////////////////////////////////////////////////////////
>
> - GenRegister getRegisterFromImmediate(ir::Immediate imm)
> + GenRegister getRegisterFromImmediate(ir::Immediate imm)
> {
> using namespace ir;
> switch (imm.type) {
> @@ -1654,15 +1661,54 @@ namespace gbe
> sel.MOV(GenRegister::retype(value, GEN_TYPE_UB), GenRegister::unpacked_ub(dst));
> }
>
> + void emitCBMove(Selection::Opaque &sel,
> + const ir::LoadInstruction &insn,
> + GenRegister address) const
> + {
> + using namespace ir;
> + GBE_ASSERT(insn.getValueNum() == 1); //todo: handle vec later
> +
> + const GenRegister dst = sel.selReg(insn.getValue(0), insn.getValueType());
> + const GenRegister src = address;
> + sel.CB_MOVE(dst, src);
> +
> + //need to implement adress register's schedule if use ia move here
> + /*
> + const GenRegister a0 = GenRegister::addr8(0);
> + const uint32_t simdWidth = sel.curr.execWidth;
> +
> + sel.push();
> + sel.curr.execWidth = 8;
> + sel.curr.quarterControl = GEN_COMPRESSION_Q1;
> + sel.MOV(a0, src);
> + sel.MOV(dst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> + sel.pop();
> +
> + if (simdWidth == 16) {
> + sel.push();
> + sel.curr.execWidth = 8;
> + sel.curr.quarterControl = GEN_COMPRESSION_Q2;
> +
> + const GenRegister nextDst = GenRegister::Qn(dst, 1);
> + const GenRegister nextSrc = GenRegister::Qn(src, 1);
> + sel.MOV(a0, nextSrc);
> + sel.MOV(nextDst, GenRegister::indirect(dst.type, 0, GEN_WIDTH_8));
> + sel.pop();
> + } */
The above code should be removed as you already commentted them out.
> + }
> +
> INLINE bool emitOne(Selection::Opaque &sel, const ir::LoadInstruction &insn) const {
> using namespace ir;
> const GenRegister address = sel.selReg(insn.getAddress());
> const AddressSpace space = insn.getAddressSpace();
> GBE_ASSERT(insn.getAddressSpace() == MEM_GLOBAL ||
> + insn.getAddressSpace() == MEM_CONSTANT ||
> insn.getAddressSpace() == MEM_PRIVATE ||
> insn.getAddressSpace() == MEM_LOCAL);
> GBE_ASSERT(sel.ctx.isScalarReg(insn.getValue(0)) == false);
> - if (insn.isAligned() == true)
> + if (insn.getAddressSpace() == MEM_CONSTANT)
> + this->emitCBMove(sel, insn, address);
> + else if (insn.isAligned() == true)
> this->emitUntypedRead(sel, insn, address, space == MEM_LOCAL ? 0xfe : 0x00);
> else {
> const GenRegister value = sel.selReg(insn.getValue(0));
> diff --git a/backend/src/backend/gen_insn_selection.hxx b/backend/src/backend/gen_insn_selection.hxx
> index 691100b..f89ad4c 100644
> --- a/backend/src/backend/gen_insn_selection.hxx
> +++ b/backend/src/backend/gen_insn_selection.hxx
> @@ -24,6 +24,7 @@ DECL_SELECTION_IR(SEL_CMP, CompareInstruction)
> DECL_SELECTION_IR(MAD, TernaryInstruction)
> DECL_SELECTION_IR(JMPI, JumpInstruction)
> DECL_SELECTION_IR(EOT, EotInstruction)
> +DECL_SELECTION_IR(CB_MOVE, CBMoveInstruction)
> DECL_SELECTION_IR(NOP, NoOpInstruction)
> DECL_SELECTION_IR(WAIT, WaitInstruction)
> DECL_SELECTION_IR(MATH, MathInstruction)
> diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
> index 8a7efdb..3d7bedd 100644
> --- a/backend/src/backend/gen_program.cpp
> +++ b/backend/src/backend/gen_program.cpp
> @@ -76,9 +76,10 @@ namespace gbe {
> unit.getFunction(name)->setSimdWidth(simdWidth);
> Context *ctx = GBE_NEW(GenContext, unit, name, limitRegisterPressure);
> kernel = ctx->compileKernel();
> - GBE_DELETE(ctx);
> - if (kernel != NULL)
> + if (kernel != NULL) {
> break;
> + }
> + GBE_DELETE(ctx);
> }
>
> // XXX spill must be implemented
> diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
> index 92122a6..d772b0d 100644
> --- a/backend/src/backend/gen_register.hpp
> +++ b/backend/src/backend/gen_register.hpp
> @@ -725,6 +725,16 @@ namespace gbe
> return ub16(GEN_GENERAL_REGISTER_FILE, nr, subnr);
> }
>
> + static INLINE GenRegister unpacked_uw(uint32_t nr, uint32_t subnr) {
> + return GenRegister(GEN_GENERAL_REGISTER_FILE,
> + nr,
> + subnr,
> + GEN_TYPE_UW,
> + GEN_VERTICAL_STRIDE_16,
> + GEN_WIDTH_8,
> + GEN_HORIZONTAL_STRIDE_2);
> + }
> +
> static INLINE GenRegister mask(uint32_t subnr) {
> return uw1(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_MASK, subnr);
> }
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index d33c533..4943a5d 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -49,9 +49,10 @@
> namespace gbe {
>
> Kernel::Kernel(const std::string &name) :
> - name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false)
> + name(name), args(NULL), argNum(0), curbeSize(0), stackSize(0), useSLM(false), cxt(NULL)
> {}
> Kernel::~Kernel(void) {
> + if(cxt) GBE_DELETE(cxt);
> GBE_SAFE_DELETE_ARRAY(args);
> }
> int32_t Kernel::getCurbeOffset(gbe_curbe_type type, uint32_t subType) const {
> @@ -229,6 +230,12 @@ namespace gbe {
> return kernel->getUseSLM() ? 1 : 0;
> }
>
> + static int32_t kernelSetConstBufSize(gbe_kernel genKernel, uint32_t argID, size_t sz) {
> + if (genKernel == NULL) return -1;
> + gbe::Kernel *kernel = (gbe::Kernel*) genKernel;
> + return kernel->setConstBufSize(argID, sz);
> + }
> +
> static uint32_t kernelGetRequiredWorkGroupSize(gbe_kernel kernel, uint32_t dim) {
> return 0u;
> }
> @@ -251,6 +258,7 @@ GBE_EXPORT_SYMBOL gbe_kernel_get_simd_width_cb *gbe_kernel_get_simd_width = NULL
> GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset = NULL;
> GBE_EXPORT_SYMBOL gbe_kernel_get_curbe_size_cb *gbe_kernel_get_curbe_size = NULL;
> GBE_EXPORT_SYMBOL gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size = NULL;
> +GBE_EXPORT_SYMBOL gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size = NULL;
> GBE_EXPORT_SYMBOL gbe_kernel_get_required_work_group_size_cb *gbe_kernel_get_required_work_group_size = NULL;
> GBE_EXPORT_SYMBOL gbe_kernel_use_slm_cb *gbe_kernel_use_slm = NULL;
>
> @@ -275,6 +283,7 @@ namespace gbe
> gbe_kernel_get_curbe_offset = gbe::kernelGetCurbeOffset;
> gbe_kernel_get_curbe_size = gbe::kernelGetCurbeSize;
> gbe_kernel_get_stack_size = gbe::kernelGetStackSize;
> + gbe_kernel_set_const_buffer_size = gbe::kernelSetConstBufSize;
> gbe_kernel_get_required_work_group_size = gbe::kernelGetRequiredWorkGroupSize;
> gbe_kernel_use_slm = gbe::kernelUseSLM;
> genSetupCallBacks();
> diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
> index b90c1df..4273a77 100644
> --- a/backend/src/backend/program.h
> +++ b/backend/src/backend/program.h
> @@ -81,7 +81,8 @@ enum gbe_curbe_type {
>
> /*! Extra arguments use the negative range of sub-values */
> enum gbe_extra_argument {
> - GBE_STACK_BUFFER = 0 /* Give stack location in curbe */
> + GBE_STACK_BUFFER = 0, /* Give stack location in curbe */
> + GBE_CONSTANT_BUFFER = 1 /* constant buffer argument location in curbe */
> };
>
> /*! Create a new program from the given source code (zero terminated string) */
> @@ -159,6 +160,10 @@ extern gbe_kernel_get_stack_size_cb *gbe_kernel_get_stack_size;
> typedef int32_t (gbe_kernel_get_curbe_offset_cb)(gbe_kernel, enum gbe_curbe_type type, uint32_t sub_type);
> extern gbe_kernel_get_curbe_offset_cb *gbe_kernel_get_curbe_offset;
>
> +/*! Set the constant pointer arg size and return the cb offset in curbe */
> +typedef int32_t (gbe_kernel_set_const_buffer_size_cb)(gbe_kernel, uint32_t argID, size_t sz);
> +extern gbe_kernel_set_const_buffer_size_cb *gbe_kernel_set_const_buffer_size;
> +
> /*! Indicates if a work group size is required. Return the required width or 0
> * if none
> */
> diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
> index e0f7dba..334c1b2 100644
> --- a/backend/src/backend/program.hpp
> +++ b/backend/src/backend/program.hpp
> @@ -1,4 +1,4 @@
> -/*
> +/*
> * Copyright © 2012 Intel Corporation
> *
> * This library is free software; you can redistribute it and/or
> @@ -26,6 +26,7 @@
> #define __GBE_PROGRAM_HPP__
>
> #include "backend/program.h"
> +#include "backend/context.hpp"
> #include "sys/hash_map.hpp"
> #include "sys/vector.hpp"
> #include <string>
> @@ -42,6 +43,7 @@ namespace gbe {
> struct KernelArgument {
> gbe_arg_type type; //!< Pointer, structure, image, regular value?
> uint32_t size; //!< Size of the argument
> + uint32_t bufSize; //!< Contant buffer size
> };
>
> /*! Stores the offset where to patch where to patch */
> @@ -94,6 +96,16 @@ namespace gbe {
> INLINE uint32_t getSIMDWidth(void) const { return this->simdWidth; }
> /*! Says if SLM is needed for it */
> INLINE bool getUseSLM(void) const { return this->useSLM; }
> + /*! set constant buffer size and return the cb curbe offset */
> + int32_t setConstBufSize(uint32_t argID, size_t sz) {
> + if(argID >= argNum) return -1;
> + if(args[argID].type != GBE_ARG_CONSTANT_PTR) return -1;
> + if(args[argID].bufSize != sz) {
> + args[argID].bufSize = sz;
> + return cxt->allocConstBuf(argID);
> + }
> + return -1;
> + }
> protected:
> friend class Context; //!< Owns the kernels
> const std::string name; //!< Kernel name
> @@ -104,6 +116,7 @@ namespace gbe {
> uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
> uint32_t stackSize; //!< Stack size (may be 0 if unused)
> bool useSLM; //!< SLM requires a special HW config
> + Context *cxt; //!< Save cxt after compiler to alloc constant buffer curbe
Seems a typo here. should be ctx, right?
> GBE_CLASS(Kernel); //!< Use custom allocators
> };
>
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list