[Beignet] [PATCH 05/18] GBE: use a uniform style to calculate register size for curbe allocation.
Zhigang Gong
zhigang.gong at intel.com
Fri Mar 28 00:10:43 PDT 2014
Concentrate the register allocation to one place, and don't
use hard coded size when do curbe register allocation. All
register size allocation should use the same method.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/context.cpp | 93 +-----------------------
backend/src/backend/context.hpp | 3 +-
backend/src/backend/gen_context.cpp | 109 +++++++++++++++++++++++++++++
backend/src/backend/gen_context.hpp | 6 ++
backend/src/backend/gen_reg_allocation.cpp | 6 ++
backend/src/backend/gen_reg_allocation.hpp | 2 +
backend/src/backend/program.hpp | 1 +
7 files changed, 126 insertions(+), 94 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index b8f4171..dc27d83 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -353,7 +353,6 @@ namespace gbe
Kernel *Context::compileKernel(void) {
this->kernel = this->allocateKernel();
this->kernel->simdWidth = this->simdWidth;
- this->buildPatchList();
this->buildArgList();
this->buildUsedLabels();
this->buildJIPs();
@@ -417,7 +416,7 @@ namespace gbe
if (stackUse.size() == 0) // no stack is used if stackptr is unused
return;
// Be sure that the stack pointer is set
- GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
+ // GBE_ASSERT(this->kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) >= 0);
uint32_t stackSize = 1*KB;
while (stackSize < fn.getStackSize()) {
stackSize <<= 1;
@@ -453,100 +452,10 @@ namespace gbe
return offset + GEN_REG_SIZE;
}
-
void Context::insertCurbeReg(ir::Register reg, uint32_t offset) {
curbeRegs.insert(std::make_pair(reg, offset));
}
- void Context::buildPatchList(void) {
- const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
- kernel->curbeSize = 0u;
-
- // We insert the block IP mask first
- this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth*sizeof(uint16_t)));
- this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0, sizeof(uint16_t)));
- this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t)));
- this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t)));
-
- // Go over the arguments and find the related patch locations
- const uint32_t argNum = fn.argNum();
- for (uint32_t argID = 0u; argID < argNum; ++argID) {
- const ir::FunctionArgument &arg = fn.getArg(argID);
- // For pointers and values, we have nothing to do. We just push the values
- if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
- arg.type == ir::FunctionArgument::LOCAL_POINTER ||
- arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
- arg.type == ir::FunctionArgument::VALUE ||
- arg.type == ir::FunctionArgument::STRUCTURE ||
- arg.type == ir::FunctionArgument::IMAGE ||
- arg.type == ir::FunctionArgument::SAMPLER)
- this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
- }
-
- // Already inserted registers go here
- const size_t localIDSize = sizeof(uint32_t) * this->simdWidth;
- insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
- insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
- insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
-
- // Go over all the instructions and find the special register we need
- // to push
-#define INSERT_REG(SPECIAL_REG, PATCH, WIDTH) \
- if (reg == ir::ocl::SPECIAL_REG) { \
- if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
- insertCurbeReg(reg, this->newCurbeEntry(GBE_CURBE_##PATCH, 0, ptrSize * WIDTH)); \
- } else
-
- bool useStackPtr = false;
- fn.foreachInstruction([&](ir::Instruction &insn) {
- const uint32_t srcNum = insn.getSrcNum();
- for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
- const ir::Register reg = insn.getSrc(srcID);
- if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
- if (srcID != 0) continue;
- const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
- const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
- ir::ImageInfoKey key(bti, type);
- const ir::Register imageInfo = insn.getSrc(0);
- if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
- uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
- insertCurbeReg(imageInfo, offset);
- }
- continue;
- }
- if (fn.isSpecialReg(reg) == false) continue;
- if (curbeRegs.find(reg) != curbeRegs.end()) continue;
- if (reg == ir::ocl::stackptr) useStackPtr = true;
- INSERT_REG(lsize0, LOCAL_SIZE_X, 1)
- INSERT_REG(lsize1, LOCAL_SIZE_Y, 1)
- INSERT_REG(lsize2, LOCAL_SIZE_Z, 1)
- INSERT_REG(gsize0, GLOBAL_SIZE_X, 1)
- INSERT_REG(gsize1, GLOBAL_SIZE_Y, 1)
- INSERT_REG(gsize2, GLOBAL_SIZE_Z, 1)
- INSERT_REG(goffset0, GLOBAL_OFFSET_X, 1)
- INSERT_REG(goffset1, GLOBAL_OFFSET_Y, 1)
- INSERT_REG(goffset2, GLOBAL_OFFSET_Z, 1)
- INSERT_REG(workdim, WORK_DIM, 1)
- INSERT_REG(numgroup0, GROUP_NUM_X, 1)
- INSERT_REG(numgroup1, GROUP_NUM_Y, 1)
- INSERT_REG(numgroup2, GROUP_NUM_Z, 1)
- INSERT_REG(stackptr, STACK_POINTER, this->simdWidth)
- do {} while(0);
- }
- });
-#undef INSERT_REG
-
- // Insert the stack buffer if used
- if (useStackPtr)
- insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
-
- // After this point the vector is immutable. Sorting it will make
- // research faster
- std::sort(kernel->patches.begin(), kernel->patches.end());
-
- kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
- }
-
void Context::buildArgList(void) {
kernel->argNum = fn.argNum();
if (kernel->argNum)
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 384a2fb..26167a0 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -105,8 +105,6 @@ namespace gbe
virtual Kernel *allocateKernel(void) = 0;
/*! Look if a stack is needed and allocate it */
void buildStack(void);
- /*! Build the curbe patch list for the given kernel */
- void buildPatchList(void);
/*! Build the list of arguments to set to launch the kernel */
void buildArgList(void);
/*! Build the sets of used labels */
@@ -121,6 +119,7 @@ namespace gbe
* of the entry
*/
void insertCurbeReg(ir::Register, uint32_t grfOffset);
+ /*! allocate a curbe entry. */
uint32_t newCurbeEntry(gbe_curbe_type value, uint32_t subValue, uint32_t size, uint32_t alignment = 0);
/*! Provide for each branch and label the label index target */
typedef map<const ir::Instruction*, ir::LabelIndex> JIPMap;
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 8bcf454..51c6c97 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -31,6 +31,7 @@
#include "backend/gen_reg_allocation.hpp"
#include "backend/gen/gen_mesa_disasm.h"
#include "ir/function.hpp"
+#include "ir/value.hpp"
#include "sys/cvar.hpp"
#include <cstring>
#include <iostream>
@@ -1860,8 +1861,116 @@ namespace gbe
BVAR(OCL_OUTPUT_REG_ALLOC, false);
BVAR(OCL_OUTPUT_ASM, false);
+
+ void GenContext::allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue) {
+ uint32_t regSize;
+ regSize = this->ra->getRegSize(reg);
+ insertCurbeReg(reg, newCurbeEntry(value, subValue, regSize));
+ }
+
+ void GenContext::buildPatchList(void) {
+ const uint32_t ptrSize = unit.getPointerSize() == ir::POINTER_32_BITS ? 4u : 8u;
+ kernel->curbeSize = 0u;
+ auto &stackUse = dag->getUse(ir::ocl::stackptr);
+
+ // We insert the block IP mask first
+#if 0
+ this->insertCurbeReg(ir::ocl::blockip, this->newCurbeEntry(GBE_CURBE_BLOCK_IP, 0, this->simdWidth * sizeof(uint16_t)));
+ this->insertCurbeReg(ir::ocl::emask, this->newCurbeEntry(GBE_CURBE_EMASK, 0, this->simdWidth * sizeof(uint16_t)));
+ this->insertCurbeReg(ir::ocl::notemask, this->newCurbeEntry(GBE_CURBE_NOT_EMASK, 0, sizeof(uint16_t)));
+ this->insertCurbeReg(ir::ocl::barriermask, this->newCurbeEntry(GBE_CURBE_BARRIER_MASK, 0, sizeof(uint16_t)));
+ // Already inserted registers go here
+ const size_t localIDSizde = sizeof(uint32_t) * this->simdWidth;
+ insertCurbeReg(ir::ocl::lid0, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_X, 0, localIDSize));
+ insertCurbeReg(ir::ocl::lid1, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Y, 0, localIDSize));
+ insertCurbeReg(ir::ocl::lid2, this->newCurbeEntry(GBE_CURBE_LOCAL_ID_Z, 0, localIDSize));
+ // Insert the stack buffer if used
+ if (stackUse.size() != 0)
+ insertCurbeReg(ir::ocl::stackbuffer, this->newCurbeEntry(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER, ptrSize));
+#else
+ using namespace ir::ocl;
+ allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+ allocCurbeReg(emask, GBE_CURBE_EMASK);
+ allocCurbeReg(notemask, GBE_CURBE_NOT_EMASK);
+ allocCurbeReg(barriermask, GBE_CURBE_BARRIER_MASK);
+ allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
+ allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
+ allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
+ if (stackUse.size() != 0)
+ allocCurbeReg(stackbuffer, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+#endif
+ // Go over the arguments and find the related patch locations
+ const uint32_t argNum = fn.argNum();
+ for (uint32_t argID = 0u; argID < argNum; ++argID) {
+ const ir::FunctionArgument &arg = fn.getArg(argID);
+ // For pointers and values, we have nothing to do. We just push the values
+ if (arg.type == ir::FunctionArgument::GLOBAL_POINTER ||
+ arg.type == ir::FunctionArgument::LOCAL_POINTER ||
+ arg.type == ir::FunctionArgument::CONSTANT_POINTER ||
+ arg.type == ir::FunctionArgument::VALUE ||
+ arg.type == ir::FunctionArgument::STRUCTURE ||
+ arg.type == ir::FunctionArgument::IMAGE ||
+ arg.type == ir::FunctionArgument::SAMPLER)
+ this->insertCurbeReg(arg.reg, this->newCurbeEntry(GBE_CURBE_KERNEL_ARGUMENT, argID, arg.size, ptrSize));
+ }
+
+ // Go over all the instructions and find the special register we need
+ // to push
+ #define INSERT_REG(SPECIAL_REG, PATCH) \
+ if (reg == ir::ocl::SPECIAL_REG) { \
+ if (curbeRegs.find(reg) != curbeRegs.end()) continue; \
+ allocCurbeReg(reg, GBE_CURBE_##PATCH); \
+ } else
+
+ fn.foreachInstruction([&](ir::Instruction &insn) {
+ const uint32_t srcNum = insn.getSrcNum();
+ for (uint32_t srcID = 0; srcID < srcNum; ++srcID) {
+ const ir::Register reg = insn.getSrc(srcID);
+ if (insn.getOpcode() == ir::OP_GET_IMAGE_INFO) {
+ if (srcID != 0) continue;
+ const unsigned char bti = ir::cast<ir::GetImageInfoInstruction>(insn).getImageIndex();
+ const unsigned char type = ir::cast<ir::GetImageInfoInstruction>(insn).getInfoType();;
+ ir::ImageInfoKey key(bti, type);
+ const ir::Register imageInfo = insn.getSrc(0);
+ if (curbeRegs.find(imageInfo) == curbeRegs.end()) {
+ uint32_t offset = this->getImageInfoCurbeOffset(key, 4);
+ insertCurbeReg(imageInfo, offset);
+ }
+ continue;
+ }
+ if (fn.isSpecialReg(reg) == false) continue;
+ if (curbeRegs.find(reg) != curbeRegs.end()) continue;
+ if (reg == ir::ocl::stackptr) GBE_ASSERT(stackUse.size() > 0);
+ INSERT_REG(lsize0, LOCAL_SIZE_X)
+ INSERT_REG(lsize1, LOCAL_SIZE_Y)
+ INSERT_REG(lsize2, LOCAL_SIZE_Z)
+ INSERT_REG(gsize0, GLOBAL_SIZE_X)
+ INSERT_REG(gsize1, GLOBAL_SIZE_Y)
+ INSERT_REG(gsize2, GLOBAL_SIZE_Z)
+ INSERT_REG(goffset0, GLOBAL_OFFSET_X)
+ INSERT_REG(goffset1, GLOBAL_OFFSET_Y)
+ INSERT_REG(goffset2, GLOBAL_OFFSET_Z)
+ INSERT_REG(workdim, WORK_DIM)
+ INSERT_REG(numgroup0, GROUP_NUM_X)
+ INSERT_REG(numgroup1, GROUP_NUM_Y)
+ INSERT_REG(numgroup2, GROUP_NUM_Z)
+ INSERT_REG(stackptr, STACK_POINTER)
+ do {} while(0);
+ }
+ });
+#undef INSERT_REG
+
+
+ // After this point the vector is immutable. Sorting it will make
+ // research faster
+ std::sort(kernel->patches.begin(), kernel->patches.end());
+
+ kernel->curbeSize = ALIGN(kernel->curbeSize, GEN_REG_SIZE);
+ }
+
bool GenContext::emitCode(void) {
GenKernel *genKernel = static_cast<GenKernel*>(this->kernel);
+ buildPatchList();
sel->select();
schedulePreRegAllocation(*this, *this->sel);
if (UNLIKELY(ra->allocate(*this->sel) == false))
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 642301c..6ec43cc 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -162,6 +162,12 @@ namespace gbe
* regenerating the code
*/
bool limitRegisterPressure;
+ private:
+ /*! Build the curbe patch list for the given kernel */
+ void buildPatchList(void);
+ /*! allocate a new curbe register and insert to curbe pool. */
+ void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0);
+
};
} /* namespace gbe */
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index f446a5b..2ba9495 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -1001,5 +1001,11 @@ namespace gbe
this->opaque->outputAllocation();
}
+ uint32_t GenRegAllocator::getRegSize(ir::Register reg) {
+ uint32_t regSize;
+ this->opaque->getRegAttrib(reg, regSize);
+ return regSize;
+ }
+
} /* namespace gbe */
diff --git a/backend/src/backend/gen_reg_allocation.hpp b/backend/src/backend/gen_reg_allocation.hpp
index bccccc8..a2a1d40 100644
--- a/backend/src/backend/gen_reg_allocation.hpp
+++ b/backend/src/backend/gen_reg_allocation.hpp
@@ -57,6 +57,8 @@ namespace gbe
GenRegister genReg(const GenRegister ®);
/*! Output the register allocation */
void outputAllocation(void);
+ /*! Get register actual size in byte. */
+ uint32_t getRegSize(ir::Register reg);
private:
/*! Actual implementation of the register allocator (use Pimpl) */
class Opaque;
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index e6fc411..83fb0b4 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -180,6 +180,7 @@ namespace gbe {
protected:
friend class Context; //!< Owns the kernels
+ friend class GenContext;
std::string name; //!< Kernel name
KernelArgument *args; //!< Each argument
vector<PatchInfo> patches; //!< Indicates how to build the curbe
--
1.8.3.2
More information about the Beignet
mailing list