[Beignet] [PATCH v3] GBE: Avoid unecessary dag/liveness computing at backend.
Zhigang Gong
zhigang.gong at intel.com
Sun Apr 27 22:25:47 PDT 2014
We don't need to compute dag/liveness at the backend when
we switch to a new code gen strategy.
For the unit test case, this patch could save 15% of the
overall execution time. For the luxmark with STRICT conformance
mode, it saves about 40% of the build time.
v3: fix some minor bugs.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/context.cpp | 28 ++++++++++++++++++++--------
backend/src/backend/context.hpp | 2 ++
backend/src/backend/gen_context.cpp | 26 +++++++++++++++++++-------
backend/src/backend/gen_context.hpp | 5 +++--
backend/src/backend/gen_insn_selection.cpp | 10 ++++++++--
backend/src/backend/gen_program.cpp | 20 ++++++++++++++++----
backend/src/backend/gen_reg_allocation.cpp | 5 +++--
7 files changed, 71 insertions(+), 25 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index dc27d83..6a0bca2 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -335,12 +335,8 @@ namespace gbe
this->liveness = GBE_NEW(ir::Liveness, const_cast<ir::Function&>(fn));
this->dag = GBE_NEW(ir::FunctionDAG, *this->liveness);
// r0 (GEN_REG_SIZE) is always set by the HW and used at the end by EOT
- this->registerAllocator = GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE);
- this->scratchAllocator = GBE_NEW(ScratchAllocator, 12*KB);
- if (fn.getSimdWidth() == 0 || OCL_SIMD_WIDTH != 15)
- this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
- else
- this->simdWidth = fn.getSimdWidth();
+ this->registerAllocator = NULL; //GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE);
+ this->scratchAllocator = NULL; //GBE_NEW(ScratchAllocator, 12*KB);
}
Context::~Context(void) {
@@ -350,12 +346,28 @@ namespace gbe
GBE_SAFE_DELETE(this->liveness);
}
+ void Context::startNewCG(uint32_t simdWidth) {
+ if (simdWidth == 0 || OCL_SIMD_WIDTH != 15)
+ this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
+ else
+ this->simdWidth = simdWidth;
+ GBE_SAFE_DELETE(this->registerAllocator);
+ GBE_SAFE_DELETE(this->scratchAllocator);
+ GBE_ASSERT(dag != NULL && liveness != NULL);
+ this->registerAllocator = GBE_NEW(RegisterAllocator, GEN_REG_SIZE, 4*KB - GEN_REG_SIZE);
+ this->scratchAllocator = GBE_NEW(ScratchAllocator, 12*KB);
+ this->curbeRegs.clear();
+ this->JIPs.clear();
+ }
+
Kernel *Context::compileKernel(void) {
this->kernel = this->allocateKernel();
this->kernel->simdWidth = this->simdWidth;
this->buildArgList();
- this->buildUsedLabels();
- this->buildJIPs();
+ if (usedLabels.size() == 0)
+ this->buildUsedLabels();
+ if (JIPs.size() == 0)
+ this->buildJIPs();
this->buildStack();
this->handleSLM();
if (this->emitCode() == false) {
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 26167a0..d4dcfca 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -56,6 +56,8 @@ namespace gbe
Context(const ir::Unit &unit, const std::string &name);
/*! Release everything needed */
virtual ~Context(void);
+ /*! start new code generation with specific simd width. */
+ void startNewCG(uint32_t simdWidth);
/*! Compile the code */
Kernel *compileKernel(void);
/*! Tells if the labels is used */
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 34e3e61..4da47f8 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -46,15 +46,12 @@ namespace gbe
GenContext::GenContext(const ir::Unit &unit,
const std::string &name,
uint32_t deviceID,
- uint32_t reservedSpillRegs,
- bool limitRegisterPressure,
bool relaxMath) :
- Context(unit, name), deviceID(deviceID), reservedSpillRegs(reservedSpillRegs),
- limitRegisterPressure(limitRegisterPressure), relaxMath(relaxMath)
+ Context(unit, name), deviceID(deviceID), relaxMath(relaxMath)
{
- this->p = GBE_NEW(GenEncoder, simdWidth, 7, deviceID); // XXX handle more than Gen7
- this->sel = GBE_NEW(Selection, *this);
- this->ra = GBE_NEW(GenRegAllocator, *this);
+ this->p = NULL;
+ this->sel = NULL;
+ this->ra = NULL;
}
GenContext::~GenContext(void) {
@@ -63,6 +60,21 @@ namespace gbe
GBE_DELETE(this->p);
}
+ void GenContext::startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure) {
+ this->limitRegisterPressure = limitRegisterPressure;
+ this->reservedSpillRegs = reservedSpillRegs;
+ Context::startNewCG(simdWidth);
+ GBE_SAFE_DELETE(ra);
+ GBE_SAFE_DELETE(sel);
+ GBE_SAFE_DELETE(p);
+ this->p = GBE_NEW(GenEncoder, this->simdWidth, 7, deviceID); // XXX handle more than Gen7
+ this->sel = GBE_NEW(Selection, *this);
+ this->ra = GBE_NEW(GenRegAllocator, *this);
+ this->branchPos2.clear();
+ this->branchPos3.clear();
+ this->labelPos.clear();
+ }
+
void GenContext::emitInstructionStream(void) {
// Emit Gen ISA
for (auto &block : *sel->blockList)
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index 12434f5..14ea719 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -52,10 +52,11 @@ namespace gbe
/*! Create a new context. name is the name of the function we want to
* compile
*/
- GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, uint32_t reservedSpillRegs = 0,
- bool limitRegisterPressure = false, bool relaxMath = false);
+ GenContext(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false);
/*! Release everything needed */
~GenContext(void);
+ /*! Start new code generation with specific parameters */
+ void startNewCG(uint32_t simdWidth, uint32_t reservedSpillRegs, bool limitRegisterPressure);
/*! Target device ID*/
uint32_t deviceID;
/*! Implements base class */
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 32086d3..c05a97b 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -740,7 +740,10 @@ namespace gbe
}
if (poolOffset > ctx.reservedSpillRegs) {
- std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") src too large pooloffset " << (uint32_t)poolOffset << std::endl;
+ if (GBE_DEBUG)
+ std::cerr << "Instruction (#" << (uint32_t)insn.opcode
+ << ") src too large pooloffset "
+ << (uint32_t)poolOffset << std::endl;
return false;
}
while(!regSet.empty()) {
@@ -798,7 +801,10 @@ namespace gbe
}
if (poolOffset > ctx.reservedSpillRegs){
- std::cerr << "Instruction (#" << (uint32_t)insn.opcode << ") dst too large pooloffset " << (uint32_t)poolOffset << std::endl;
+ if (GBE_DEBUG)
+ std::cerr << "Instruction (#" << (uint32_t)insn.opcode
+ << ") dst too large pooloffset "
+ << (uint32_t)poolOffset << std::endl;
return false;
}
while(!regSet.empty()) {
diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp
index 4dca79d..3a4421a 100644
--- a/backend/src/backend/gen_program.cpp
+++ b/backend/src/backend/gen_program.cpp
@@ -80,9 +80,12 @@ namespace gbe {
bool limitRegisterPressure;
} codeGenStrategy[] = {
{16, 0, false},
+ {16, 5, false},
{16, 10, false},
{8, 0, false},
+ {8, 4, false},
{8, 8, false},
+ {8, 16, false},
};
Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath) {
@@ -91,11 +94,20 @@ namespace gbe {
// when the function already provides the simd width we need to use (i.e.
// non zero)
const ir::Function *fn = unit.getFunction(name);
- const uint32_t codeGenNum = fn->getSimdWidth() != 0 ? 2 : 4;
- uint32_t codeGen = fn->getSimdWidth() == 8 ? 2 : 0;
+ uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]);
+ uint32_t codeGen = 0;
+ if (fn->getSimdWidth() == 8) {
+ codeGen = 3;
+ } else if (fn->getSimdWidth() == 16) {
+ codeGenNum = 3;
+ } else if (fn->getSimdWidth() == 0) {
+ codeGen = 0;
+ } else
+ GBE_ASSERT(0);
Kernel *kernel = NULL;
// Stop when compilation is successful
+ GenContext *ctx = GBE_NEW(GenContext, unit, name, deviceID, relaxMath);
for (; codeGen < codeGenNum; ++codeGen) {
const uint32_t simdWidth = codeGenStrategy[codeGen].simdWidth;
const bool limitRegisterPressure = codeGenStrategy[codeGen].limitRegisterPressure;
@@ -103,14 +115,14 @@ namespace gbe {
// Force the SIMD width now and try to compile
unit.getFunction(name)->setSimdWidth(simdWidth);
- Context *ctx = GBE_NEW(GenContext, unit, name, deviceID, reservedSpillRegs, limitRegisterPressure, relaxMath);
+ ctx->startNewCG(simdWidth, reservedSpillRegs, limitRegisterPressure);
kernel = ctx->compileKernel();
if (kernel != NULL) {
break;
}
- GBE_DELETE(ctx);
fn->getImageSet()->clearInfo();
}
+ //GBE_DELETE(ctx);
GBE_ASSERTM(kernel != NULL, "Fail to compile kernel, may need to increase reserved registers for spilling.");
return kernel;
diff --git a/backend/src/backend/gen_reg_allocation.cpp b/backend/src/backend/gen_reg_allocation.cpp
index 32cd643..54b7cac 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -627,7 +627,6 @@ namespace gbe
const uint32_t grfOffset = allocateReg(interval, size, alignment);
if(grfOffset == 0) {
GBE_ASSERT(!(reservedReg && family != ir::FAMILY_DWORD));
- GBE_ASSERT(ctx.reservedSpillRegs == 0 || vector->regNum < ctx.reservedSpillRegs);
for(int i = vector->regNum-1; i >= 0; i--) {
if (!spillReg(vector->reg[i].reg()))
return false;
@@ -662,7 +661,8 @@ namespace gbe
allocateScratchForSpilled();
bool success = selection.spillRegs(spilledRegs, reservedReg);
if (!success) {
- std::cerr << "Fail to spill registers." << std::endl;
+ if (GBE_DEBUG)
+ std::cerr << "Fail to spill registers." << std::endl;
return false;
}
}
@@ -775,6 +775,7 @@ namespace gbe
// from the RA map.
bool success = expireReg(interval.reg);
GBE_ASSERT(success);
+ success = success;
RA.erase(interval.reg);
}
spilledRegs.insert(std::make_pair(interval.reg, spillTag));
--
1.8.3.2
More information about the Beignet
mailing list