[Beignet] [PATCH V2] remove GBE_CURBE_STACK_POINTER in payload
Guo Yejun
yejun.guo at intel.com
Mon Aug 24 13:03:51 PDT 2015
initialize the data inside kernel with packed integer vector
V2: call functions from ctx, instead of ctx.registerAllocator
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/src/backend/context.cpp | 10 ++++-----
backend/src/backend/context.hpp | 2 +-
backend/src/backend/gen75_context.cpp | 4 +++-
backend/src/backend/gen_context.cpp | 33 ++++++++++++++++++++++++++++--
backend/src/backend/gen_context.hpp | 2 ++
backend/src/backend/gen_reg_allocation.cpp | 27 +++++++++++++++---------
backend/src/backend/program.h | 1 -
backend/src/backend/program.hpp | 2 +-
src/cl_command_queue_gen7.c | 9 --------
9 files changed, 60 insertions(+), 30 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index b8dfa8c..33b2409 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -373,8 +373,8 @@ namespace gbe
return this->kernel;
}
- int16_t Context::allocate(int16_t size, int16_t alignment) {
- return registerAllocator->allocate(size, alignment);
+ int16_t Context::allocate(int16_t size, int16_t alignment, bool bFwd) {
+ return registerAllocator->allocate(size, alignment, bFwd);
}
void Context::deallocate(int16_t offset) { registerAllocator->deallocate(offset); }
@@ -396,10 +396,10 @@ namespace gbe
void Context::buildStack(void) {
const auto &stackUse = dag->getUse(ir::ocl::stackptr);
- if (stackUse.size() == 0) // no stack is used if stackptr is unused
+ if (stackUse.size() == 0) { // no stack is used if stackptr is unused
+ this->kernel->stackSize = 0;
return;
- // Be sure that the stack pointer is set
- // GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
+ }
uint32_t stackSize = 128;
while (stackSize < fn.getStackSize()) {
stackSize *= 3;
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index faa7c8a..079967d 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -85,7 +85,7 @@ namespace gbe
return JIPs.find(insn) != JIPs.end();
}
/*! Allocate some memory in the register file */
- int16_t allocate(int16_t size, int16_t alignment);
+ int16_t allocate(int16_t size, int16_t alignment, bool bFwd=0);
/*! Deallocate previously allocated memory */
void deallocate(int16_t offset);
/*! Spilt a block into 2 blocks, for some registers allocate together but deallocate seperate */
diff --git a/backend/src/backend/gen75_context.cpp b/backend/src/backend/gen75_context.cpp
index b9dfb18..7d407c3 100644
--- a/backend/src/backend/gen75_context.cpp
+++ b/backend/src/backend/gen75_context.cpp
@@ -67,7 +67,7 @@ namespace gbe
using namespace ir;
// Only emit stack pointer computation if we use a stack
- if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
+ if (kernel->getStackSize() == 0)
return;
// Check that everything is consistent in the kernel code
@@ -80,6 +80,8 @@ namespace gbe
GenRegister::ud16grf(ir::ocl::stackptr);
const GenRegister stackptr = ra->genReg(selStatckPtr);
+ loadLaneID(stackptr);
+
// We compute the per-lane stack pointer here
// private address start from zero
p->push();
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 0c301dd..25fdf08 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -176,11 +176,39 @@ namespace gbe
p->pop();
}
+ void GenContext::loadLaneID(GenRegister dst) {
+ const GenRegister laneID = GenRegister::immv(0x76543210);
+ GenRegister dst_;
+ if (dst.type == GEN_TYPE_UW)
+ dst_ = dst;
+ else
+ dst_ = GenRegister::uw16grf(126,0);
+
+ p->push();
+ uint32_t execWidth = p->curr.execWidth;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ if (execWidth == 8)
+ p->MOV(dst_, laneID);
+ else {
+ p->curr.execWidth = 8;
+ p->MOV(dst_, laneID);
+ //Packed Unsigned Half-Byte Integer Vector does not work
+ //have to mock by adding 8 to the singed vector
+ const GenRegister eight = GenRegister::immuw(8);
+ p->ADD(GenRegister::offset(dst_, 0, 16), dst_, eight);
+ p->curr.execWidth = 16;
+ }
+ if (dst.type != GEN_TYPE_UW)
+ p->MOV(dst, dst_);
+ p->pop();
+ }
+
void GenContext::emitStackPointer(void) {
using namespace ir;
// Only emit stack pointer computation if we use a stack
- if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
+ if (kernel->getStackSize() == 0)
return;
// Check that everything is consistent in the kernel code
@@ -193,6 +221,8 @@ namespace gbe
GenRegister::ud16grf(ir::ocl::stackptr);
const GenRegister stackptr = ra->genReg(selStatckPtr);
+ loadLaneID(stackptr);
+
// We compute the per-lane stack pointer here
// threadId * perThreadSize + laneId*perLaneSize
// let private address start from zero
@@ -2254,7 +2284,6 @@ namespace gbe
INSERT_REG(numgroup0, GROUP_NUM_X)
INSERT_REG(numgroup1, GROUP_NUM_Y)
INSERT_REG(numgroup2, GROUP_NUM_Z)
- INSERT_REG(stackptr, STACK_POINTER)
INSERT_REG(printfbptr, PRINTF_BUF_POINTER)
INSERT_REG(printfiptr, PRINTF_INDEX_POINTER)
do {} while(0);
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 8ef725f..34f9293 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -107,6 +107,8 @@ namespace gbe
return this->liveness->getLiveIn(bb);
}
+ void loadLaneID(GenRegister dst);
+
void collectShifter(GenRegister dest, GenRegister src);
void loadTopHalf(GenRegister dest, GenRegister src);
void storeTopHalf(GenRegister dest, GenRegister src);
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 4cb88e9..39f1934 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -133,8 +133,8 @@ namespace gbe
void validateFlag(Selection &selection, SelectionInstruction &insn);
/*! Allocate the GRF registers */
bool allocateGRFs(Selection &selection);
- /*! Create gen registers for all preallocated curbe registers. */
- void allocatePayloadRegs(void);
+ /*! Create gen registers for all preallocated special registers. */
+ void allocateSpecialRegs(void);
/*! Create a Gen register from a register set in the payload */
void allocatePayloadReg(ir::Register, uint32_t offset, uint32_t subOffset = 0);
/*! Create the intervals for each register */
@@ -228,7 +228,7 @@ namespace gbe
this->intervals[reg].maxID = 0;
}
- INLINE void GenRegAllocator::Opaque::allocatePayloadRegs(void) {
+ INLINE void GenRegAllocator::Opaque::allocateSpecialRegs(void) {
using namespace ir;
for(auto &it : this->ctx.curbeRegs)
allocatePayloadReg(it.first, it.second);
@@ -248,6 +248,19 @@ namespace gbe
allocatePayloadReg(reg, it->second, subOffset);
ctx.splitBlock(it->second, subOffset);
}
+
+ if (RA.contains(ocl::stackbuffer)) {
+ uint32_t regSize = 0;
+ this->getRegAttrib(ocl::stackptr, regSize);
+ uint32_t offset = this->ctx.allocate(regSize, regSize, 1);
+ RA.insert(std::make_pair(ocl::stackptr, offset));
+ }
+
+ // Group and barrier IDs are always allocated by the hardware in r0
+ RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
+ RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6
+ RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7
+ RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
}
bool GenRegAllocator::Opaque::createGenReg(const Selection &selection, const GenRegInterval &interval) {
@@ -1001,13 +1014,7 @@ namespace gbe
this->intervals.push_back(ir::Register(regID));
// Allocate the special registers (only those which are actually used)
- this->allocatePayloadRegs();
-
- // Group and barrier IDs are always allocated by the hardware in r0
- RA.insert(std::make_pair(ocl::groupid0, 1*sizeof(float))); // r0.1
- RA.insert(std::make_pair(ocl::groupid1, 6*sizeof(float))); // r0.6
- RA.insert(std::make_pair(ocl::groupid2, 7*sizeof(float))); // r0.7
- RA.insert(std::make_pair(ocl::barrierid, 2*sizeof(float))); // r0.2
+ this->allocateSpecialRegs();
// block IP used to handle the mask in SW is always allocated
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index fa75052..af19732 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -91,7 +91,6 @@ enum gbe_curbe_type {
GBE_CURBE_GROUP_NUM_Z,
GBE_CURBE_WORK_DIM,
GBE_CURBE_IMAGE_INFO,
- GBE_CURBE_STACK_POINTER,
GBE_CURBE_PRINTF_BUF_POINTER,
GBE_CURBE_PRINTF_INDEX_POINTER,
GBE_CURBE_KERNEL_ARGUMENT,
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index cff2463..efe192f 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -223,7 +223,7 @@ namespace gbe {
uint32_t argNum; //!< Number of function arguments
uint32_t curbeSize; //!< Size of the data to push
uint32_t simdWidth; //!< SIMD size for the kernel (lane number)
- uint32_t stackSize; //!< Stack size (may be 0 if unused)
+ uint32_t stackSize; //!< Stack size (0 if unused)
uint32_t scratchSize; //!< Scratch memory size (may be 0 if unused)
bool useSLM; //!< SLM requires a special HW config
uint32_t slmSize; //!< slm size for kernel variable
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 4adbd2b..0e60528 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -210,15 +210,6 @@ cl_curbe_fill(cl_kernel ker,
UPLOAD(GBE_CURBE_WORK_DIM, work_dim);
#undef UPLOAD
- /* Write identity for the stack pointer. This is required by the stack pointer
- * computation in the kernel
- */
- if ((offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_STACK_POINTER, 0)) >= 0) {
- const uint32_t simd_sz = interp_kernel_get_simd_width(ker->opaque);
- uint32_t *stackptr = (uint32_t *) (ker->curbe + offset);
- int32_t i;
- for (i = 0; i < (int32_t) simd_sz; ++i) stackptr[i] = i;
- }
/* Handle the various offsets to SLM */
const int32_t arg_n = interp_kernel_get_arg_num(ker->opaque);
int32_t arg, slm_offset = interp_kernel_get_slm_size(ker->opaque);
--
1.9.1
More information about the Beignet
mailing list