[Beignet] [PATCH 6/8] GBE: Extend front label ip to 32 bit on demand.
Zhigang Gong
zhigang.gong at intel.com
Tue Mar 31 19:05:41 PDT 2015
If the front end label ip exceed 0xffff, then the backend will
use real DW to represent each block's IP address. This is
a dynamic behaviour according to the actual front end's label
number.
Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
backend/src/backend/context.cpp | 2 +
backend/src/backend/context.hpp | 8 +++
backend/src/backend/gen_context.cpp | 29 ++++++++--
backend/src/backend/gen_insn_selection.cpp | 89 ++++++++++++++++++++++++------
backend/src/backend/gen_insn_selection.hpp | 1 -
backend/src/backend/program.h | 1 +
backend/src/ir/function.cpp | 4 +-
backend/src/ir/profile.cpp | 2 +
backend/src/ir/profile.hpp | 5 +-
src/cl_command_queue_gen7.c | 16 ++++--
10 files changed, 128 insertions(+), 29 deletions(-)
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index 5e33ddd..59ccc79 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -354,6 +354,8 @@ namespace gbe
this->kernel = this->allocateKernel();
this->kernel->simdWidth = this->simdWidth;
this->buildArgList();
+ if (fn.labelNum() > 0xffff)
+ this->useDWLabel = true;
if (usedLabels.size() == 0)
this->buildUsedLabels();
if (JIPs.size() == 0)
diff --git a/backend/src/backend/context.hpp b/backend/src/backend/context.hpp
index 1b3744b..faa7c8a 100644
--- a/backend/src/backend/context.hpp
+++ b/backend/src/backend/context.hpp
@@ -100,6 +100,13 @@ namespace gbe
/*! Preallocated curbe register set including special registers. */
map<ir::Register, uint32_t> curbeRegs;
ir::Register getSurfaceBaseReg(unsigned char bti);
+ /* Indicate whether we should use DW label or W label in backend.*/
+ bool isDWLabel(void) const {
+ return useDWLabel;
+ }
+ uint32_t getMaxLabel(void) const {
+ return this->isDWLabel() ? 0xffffffff : 0xffff;
+ }
protected:
/*! Build the instruction stream. Return false if failed */
virtual bool emitCode(void) = 0;
@@ -140,6 +147,7 @@ namespace gbe
set<ir::LabelIndex> usedLabels; //!< Set of all used labels
JIPMap JIPs; //!< Where to jump all labels/branches
uint32_t simdWidth; //!< Number of lanes per HW threads
+ bool useDWLabel; //!< false means using u16 label, true means using u32 label.
map<unsigned char, ir::Register> btiRegMap;
GBE_CLASS(Context); //!< Use custom allocators
};
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 3fab9c8..13c7664 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -133,18 +133,36 @@ namespace gbe
return true;
}
+ /* Get proper block ip register according to current label width. */
+ static GenRegister getBlockIP(GenContext &ctx) {
+ GenRegister blockip;
+ if (!ctx.isDWLabel())
+ blockip = ctx.ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+ else
+ blockip = ctx.ra->genReg(GenRegister::ud8grf(ir::ocl::dwblockip));
+ return blockip;
+ }
+
+ /* Set current block ip register to a specified constant label value. */
+ static void setBlockIP(GenContext &ctx, GenRegister blockip, uint32_t label) {
+ if (!ctx.isDWLabel())
+ ctx.p->MOV(blockip, GenRegister::immuw(label));
+ else
+ ctx.p->MOV(blockip, GenRegister::immud(label));
+ }
+
void GenContext::clearFlagRegister(void) {
// when group size not aligned to simdWidth, flag register need clear to
// make prediction(any8/16h) work correctly
- const GenRegister blockip = ra->genReg(GenRegister::uw8grf(ir::ocl::blockip));
+ const GenRegister blockip = getBlockIP(*this);
const GenRegister zero = ra->genReg(GenRegister::uw1grf(ir::ocl::zero));
const GenRegister one = ra->genReg(GenRegister::uw1grf(ir::ocl::one));
p->push();
p->curr.noMask = 1;
p->curr.predicate = GEN_PREDICATE_NONE;
- p->MOV(blockip, GenRegister::immuw(GEN_MAX_LABEL));
+ setBlockIP(*this, blockip, getMaxLabel());
p->curr.noMask = 0;
- p->MOV(blockip, GenRegister::immuw(0));
+ setBlockIP(*this, blockip, 0);
p->curr.execWidth = 1;
// FIXME, need to get the final use set of zero/one, if there is no user,
// no need to generate the following two instructions.
@@ -1808,7 +1826,10 @@ namespace gbe
// We insert the block IP mask first
using namespace ir::ocl;
- allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+ if (!isDWLabel())
+ allocCurbeReg(blockip, GBE_CURBE_BLOCK_IP);
+ else
+ allocCurbeReg(dwblockip, GBE_CURBE_DW_BLOCK_IP);
allocCurbeReg(lid0, GBE_CURBE_LOCAL_ID_X);
allocCurbeReg(lid1, GBE_CURBE_LOCAL_ID_Y);
allocCurbeReg(lid2, GBE_CURBE_LOCAL_ID_Z);
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index 27ed11b..e025698 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -633,6 +633,64 @@ namespace gbe
SelectionDAG *dag0, SelectionDAG *dag1,
GenRegister &src0, GenRegister &src1,
ir::Type type, bool &inverse);
+
+ /* Get current block IP register according to label width. */
+ GenRegister getBlockIP() {
+ return ctx.isDWLabel() ? selReg(ir::ocl::dwblockip) : selReg(ir::ocl::blockip);
+ }
+
+ /* Get proper label immediate gen register from label value. */
+ GenRegister getLabelImmReg(uint32_t labelValue) {
+ return ctx.isDWLabel() ? GenRegister::immud(labelValue) : GenRegister::immuw(labelValue);
+ }
+
+ /* Get proper label immediate gen register from label. */
+ GenRegister getLabelImmReg(ir::LabelIndex label) {
+ return getLabelImmReg(label.value());
+ }
+
+ /* Set current label register to a label value. */
+ void setBlockIP(GenRegister blockip, uint32_t labelValue) {
+ if (!ctx.isDWLabel())
+ MOV(GenRegister::retype(blockip, GEN_TYPE_UW), GenRegister::immuw(labelValue));
+ else
+ MOV(GenRegister::retype(blockip, GEN_TYPE_UD), GenRegister::immud(labelValue));
+ }
+
+ /* Generate comparison instruction to compare block ip address and specified label register.*/
+ void cmpBlockIP(uint32_t cond,
+ GenRegister blockip,
+ GenRegister labelReg) {
+ if (!ctx.isDWLabel())
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UW),
+ labelReg,
+ GenRegister::retype(GenRegister::null(),
+ GEN_TYPE_UW));
+ else
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UD),
+ labelReg,
+ GenRegister::retype(GenRegister::null(),
+ GEN_TYPE_UD));
+ }
+
+ void cmpBlockIP(uint32_t cond,
+ GenRegister blockip,
+ uint32_t labelValue) {
+ if (!ctx.isDWLabel())
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UW),
+ GenRegister::immuw(labelValue),
+ GenRegister::retype(GenRegister::null(),
+ GEN_TYPE_UW));
+ else
+ CMP(cond,
+ GenRegister::retype(blockip, GEN_TYPE_UD),
+ GenRegister::immud(labelValue),
+ GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
+ }
+
/*! Use custom allocators */
GBE_CLASS(Opaque);
friend class SelectionBlock;
@@ -3860,10 +3918,10 @@ namespace gbe
{
using namespace ir;
const LabelIndex label = insn.getLabelIndex();
- const GenRegister src0 = sel.selReg(ocl::blockip);
- const GenRegister src1 = GenRegister::immuw(label);
+ const GenRegister src0 = sel.getBlockIP();
+ const GenRegister src1 = sel.getLabelImmReg(label);
const uint32_t simdWidth = sel.ctx.getSimdWidth();
- GBE_ASSERTM(label < GEN_MAX_LABEL, "We reached the maximum label number which is reserved for barrier handling");
+ GBE_ASSERTM(label < sel.ctx.getMaxLabel(), "We reached the maximum label number which is reserved for barrier handling");
sel.LABEL(label);
if(!insn.getParent()->needIf)
@@ -3884,8 +3942,7 @@ namespace gbe
sel.push();
sel.curr.noMask = 1;
sel.curr.predicate = GEN_PREDICATE_NONE;
- sel.CMP(GEN_CONDITIONAL_LE, GenRegister::retype(src0, GEN_TYPE_UW), src1,
- GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
sel.pop();
if (sel.block->hasBarrier) {
@@ -3895,11 +3952,10 @@ namespace gbe
// this block, as it will always excute with all lanes activated.
sel.push();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL));
+ sel.setBlockIP(src0, sel.ctx.getMaxLabel());
sel.curr.predicate = GEN_PREDICATE_NONE;
sel.curr.noMask = 1;
- sel.CMP(GEN_CONDITIONAL_EQ, GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(GEN_MAX_LABEL),
- GenRegister::retype(GenRegister::null(), GEN_TYPE_UW));
+ sel.cmpBlockIP(GEN_CONDITIONAL_EQ, src0, sel.ctx.getMaxLabel());
if (simdWidth == 8)
sel.curr.predicate = GEN_PREDICATE_ALIGN1_ALL8H;
else if (simdWidth == 16)
@@ -3914,7 +3970,7 @@ namespace gbe
// FIXME, if the last BRA is unconditional jump, we don't need to update the label here.
sel.push();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(GenRegister::retype(src0, GEN_TYPE_UW), GenRegister::immuw(label.value()));
+ sel.setBlockIP(src0, label.value());
sel.pop();
}
else {
@@ -4191,7 +4247,7 @@ namespace gbe
ir::LabelIndex src) const
{
using namespace ir;
- const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ const GenRegister ip = sel.getBlockIP();
// We will not emit any jump if we must go the next block anyway
const BasicBlock *curr = insn.getParent();
@@ -4206,7 +4262,7 @@ namespace gbe
sel.curr.physicalFlag = 0;
sel.curr.flagIndex = pred.value();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
sel.curr.predicate = GEN_PREDICATE_NONE;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
sel.ENDIF(GenRegister::immd(0), nextLabel);
@@ -4216,7 +4272,7 @@ namespace gbe
// Update the PcIPs
const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
if(insn.getParent()->needEndif)
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
if(insn.getParent()->needEndif && !insn.getParent()->needIf)
@@ -4242,7 +4298,8 @@ namespace gbe
ir::LabelIndex src) const
{
using namespace ir;
- const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ //const GenRegister ip = sel.selReg(ocl::blockip, TYPE_U16);
+ const GenRegister ip = sel.getBlockIP();
const Function &fn = sel.ctx.getFunction();
const BasicBlock &bb = fn.getBlock(src);
const LabelIndex jip = sel.ctx.getLabelIndex(&insn);
@@ -4257,13 +4314,13 @@ namespace gbe
// block. Next instruction will properly update the IPs of the lanes
// that actually take the branch
const LabelIndex next = bb.getNextBlock()->getLabelIndex();
- sel.MOV(ip, GenRegister::immuw(next.value()));
+ sel.setBlockIP(ip, next.value());
GBE_ASSERT(jip == dst);
sel.push();
sel.curr.physicalFlag = 0;
sel.curr.flagIndex = pred.value();
sel.curr.predicate = GEN_PREDICATE_NORMAL;
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
sel.block->endifOffset = -1;
sel.curr.predicate = GEN_PREDICATE_NONE;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif)
@@ -4280,7 +4337,7 @@ namespace gbe
const LabelIndex next = bb.getNextBlock()->getLabelIndex();
// Update the PcIPs
if(insn.getParent()->needEndif)
- sel.MOV(ip, GenRegister::immuw(dst.value()));
+ sel.setBlockIP(ip, dst.value());
sel.block->endifOffset = -1;
if (!sel.block->hasBarrier && !sel.block->removeSimpleIfEndif) {
if(insn.getParent()->needEndif && !insn.getParent()->needIf)
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index c2c4dae..d3f7363 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -44,7 +44,6 @@ namespace gbe
/*! Translate IR compare to Gen compare */
uint32_t getGenCompare(ir::Opcode opcode);
- #define GEN_MAX_LABEL 0xFFFF
/*! Selection opcodes properly encoded from 0 to n for fast jump tables
* generations
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index dc5662f..4065a17 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -96,6 +96,7 @@ enum gbe_curbe_type {
GBE_CURBE_KERNEL_ARGUMENT,
GBE_CURBE_EXTRA_ARGUMENT,
GBE_CURBE_BLOCK_IP,
+ GBE_CURBE_DW_BLOCK_IP,
GBE_CURBE_THREAD_NUM,
GBE_CURBE_ZERO,
GBE_CURBE_ONE,
diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
index 7983778..79dc997 100644
--- a/backend/src/ir/function.cpp
+++ b/backend/src/ir/function.cpp
@@ -136,8 +136,8 @@ namespace ir {
}
LabelIndex Function::newLabel(void) {
- GBE_ASSERTM(labels.size() < 0xffff,
- "Too many labels are defined (65536 only are supported)");
+ GBE_ASSERTM(labels.size() < 0xffffffffull,
+ "Too many labels are defined (4G only are supported)");
const LabelIndex index(labels.size());
labels.push_back(NULL);
return index;
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 4c272bd..ec7ab94 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -43,6 +43,7 @@ namespace ir {
"zero", "one",
"retVal", "slm_offset",
"printf_buffer_pointer", "printf_index_buffer_pointer",
+ "dwblockip",
"invalid"
};
@@ -86,6 +87,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, slmoffset, 1);
DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1);
DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1);
+ DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0);
DECL_NEW_REG(FAMILY_DWORD, invalid, 1);
}
#undef DECL_NEW_REG
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index 7259d9f..8f69320 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -71,8 +71,9 @@ namespace ir {
static const Register slmoffset = Register(27); // Group's SLM offset in total 64K SLM
static const Register printfbptr = Register(28); // printf buffer address .
static const Register printfiptr = Register(29); // printf index buffer address.
- static const Register invalid = Register(30); // used for valid comparation.
- static const uint32_t regNum = 31; // number of special registers
+ static const Register dwblockip = Register(30); // blockip
+ static const Register invalid = Register(31); // used for valid comparation.
+ static const uint32_t regNum = 32; // number of special registers
extern const char *specialRegMean[]; // special register name.
} /* namespace ocl */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 253c4f2..4adbd2b 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -49,23 +49,27 @@ cl_set_varying_payload(const cl_kernel ker,
size_t i, j, k, curr = 0;
int32_t id_offset[3], ip_offset;
cl_int err = CL_SUCCESS;
+ int32_t dw_ip_offset = -1;
id_offset[0] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_X, 0);
id_offset[1] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Y, 0);
id_offset[2] = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_LOCAL_ID_Z, 0);
ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_BLOCK_IP, 0);
+ if (ip_offset < 0)
+ dw_ip_offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_DW_BLOCK_IP, 0);
+ assert(ip_offset < 0 || dw_ip_offset < 0);
assert(id_offset[0] >= 0 &&
id_offset[1] >= 0 &&
id_offset[2] >= 0 &&
- ip_offset >= 0);
+ (ip_offset >= 0 || dw_ip_offset >= 0));
TRY_ALLOC(ids[0], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(ids[1], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(ids[2], (uint32_t*) alloca(sizeof(uint32_t)*thread_n*simd_sz));
TRY_ALLOC(block_ips, (uint16_t*) alloca(sizeof(uint16_t)*thread_n*simd_sz));
-
/* 0xffff means that the lane is inactivated */
- memset(block_ips, 0xff, sizeof(uint16_t)*thread_n*simd_sz);
+ memset(block_ips, 0xff, sizeof(int16_t)*thread_n*simd_sz);
+
/* Compute the IDs and the block IPs */
for (k = 0; k < local_wk_sz[2]; ++k)
@@ -84,11 +88,15 @@ cl_set_varying_payload(const cl_kernel ker,
uint32_t *ids1 = (uint32_t *) (data + id_offset[1]);
uint32_t *ids2 = (uint32_t *) (data + id_offset[2]);
uint16_t *ips = (uint16_t *) (data + ip_offset);
+ uint32_t *dw_ips = (uint32_t *) (data + dw_ip_offset);
for (j = 0; j < simd_sz; ++j, ++curr) {
ids0[j] = ids[0][curr];
ids1[j] = ids[1][curr];
ids2[j] = ids[2][curr];
- ips[j] = block_ips[curr];
+ if (ip_offset >= 0)
+ ips[j] = block_ips[curr];
+ if (dw_ip_offset >= 0)
+ dw_ips[j] = block_ips[curr];
}
}
--
1.9.1
More information about the Beignet
mailing list