[Beignet] [PATCH 11/19] Backend: Add profilingProlog function for GenContext.
junyan.he at inbox.com
junyan.he at inbox.com
Tue Sep 8 17:01:02 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
The profilingProlog will collect useful information
for profiling, including XYZ global range and prolog
timestamp.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen_context.cpp | 116 +++++++++++++++++++++++++++++++++++
backend/src/backend/gen_context.hpp | 2 +
2 files changed, 118 insertions(+)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 435b224..696d86a 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2185,6 +2185,119 @@ namespace gbe
p->TYPED_WRITE(header, true, bti);
}
+ void GenContext::calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int flag, int subFlag)
+ {
+#define CALC_GID(dim) do {\
+ GenRegister g##dim##start = GenRegister::offset(reg, 0, 8 + dim*8); \
+ GenRegister g##dim##end = GenRegister::offset(g##dim##start, 0, 4); \
+ GenRegister id##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud16grf(ir::ocl::lid##dim)), GEN_TYPE_UD); \
+ GenRegister localsz##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::lsize##dim)), GEN_TYPE_UD); \
+ GenRegister gid##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::groupid##dim)), GEN_TYPE_UD); \
+ GenRegister goffset##dim = GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::goffset##dim)), GEN_TYPE_UD); \
+ p->MUL(g##dim##start, localsz##dim, gid##dim); \
+ p->ADD(g##dim##start, g##dim##start, id##dim); \
+ p->ADD(g##dim##start, g##dim##start, goffset##dim); \
+ GenRegister ip; \
+ p->MOV(flagReg, GenRegister::immuw(0x0)); \
+ p->curr.useFlag(flag, subFlag); \
+ p->curr.predicate = GEN_PREDICATE_NONE; \
+ if (this->simdWidth == 16) \
+ p->curr.execWidth = 16; \
+ else \
+ p->curr.execWidth = 8; \
+ if (!isDWLabel()) { \
+ ip = ra->genReg(GenRegister::uw16grf(ir::ocl::blockip)); \
+ p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immuw(0xffff)); \
+ } else { \
+ ip = ra->genReg(GenRegister::ud16grf(ir::ocl::dwblockip)); \
+ p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immud(0xffffffff)); \
+ } \
+ p->curr.execWidth = 1; \
+ p->MOV(GenRegister::retype(tmp, GEN_TYPE_UW), flagReg); \
+ if (this->simdWidth == 16) \
+ p->OR(tmp, tmp, GenRegister::immud(0xffff0000)); \
+ else \
+ p->OR(tmp, tmp, GenRegister::immud(0xffffff00)); \
+ p->FBL(tmp, tmp); \
+ p->ADD(tmp, tmp, GenRegister::negate(GenRegister::immud(0x1))); \
+ p->MUL(tmp, tmp, GenRegister::immud(4)); \
+ p->MOV(GenRegister::addr1(0), GenRegister::retype(tmp, GEN_TYPE_UW)); \
+ GenRegister dimEnd = GenRegister::to_indirect1xN(id##dim, 0); \
+ p->MOV(tmp, dimEnd); \
+ p->MUL(g##dim##end, localsz##dim, gid##dim); \
+ p->ADD(g##dim##end, g##dim##end, tmp); \
+ p->ADD(g##dim##end, g##dim##end, goffset##dim); \
+} while(0)
+
+ GenRegister flagReg = GenRegister::flag(flag, subFlag);
+ p->push(); {
+ p->curr.execWidth = 1;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ CALC_GID(0);
+ CALC_GID(1);
+ CALC_GID(2);
+ } p->pop();
+
+#undef CALC_GID
+ }
+
+ void GenContext::profilingProlog(void) {
+ // record the prolog, globalXYZ and lasttimestamp at the very beginning.
+ GenRegister profilingReg2, profilingReg3, profilingReg4;
+ GenRegister tmArf = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ 0xc0,
+ 0,
+ GEN_TYPE_UW,
+ GEN_VERTICAL_STRIDE_4,
+ GEN_WIDTH_4,
+ GEN_HORIZONTAL_STRIDE_1);
+ if (this->simdWidth == 16) {
+ profilingReg2 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts1));
+ profilingReg3 = GenRegister::offset(profilingReg2, 1);
+ profilingReg4 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts2));
+ } else {
+ GBE_ASSERT(this->simdWidth == 8);
+ profilingReg2 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts2));
+ profilingReg3 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts3));
+ profilingReg4 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts4));
+ }
+
+ /* MOV(4) prolog<1>:UW arf_tm<4,4,1>:UW */
+ /* MOV(4) lastTsReg<1>:UW prolog<4,4,1>:UW */
+ GenRegister prolog = profilingReg2;
+ prolog.type = GEN_TYPE_UW;
+ prolog.hstride = GEN_HORIZONTAL_STRIDE_1;
+ prolog.vstride = GEN_VERTICAL_STRIDE_4;
+ prolog.width = GEN_WIDTH_4;
+ prolog = GenRegister::offset(prolog, 0, 4*sizeof(uint32_t));
+
+ GenRegister lastTsReg = GenRegister::toUniform(profilingReg3, GEN_TYPE_UL);
+ lastTsReg = GenRegister::offset(lastTsReg, 0, 2*sizeof(uint64_t));
+ lastTsReg.type = GEN_TYPE_UW;
+ lastTsReg.hstride = GEN_HORIZONTAL_STRIDE_1;
+ lastTsReg.vstride = GEN_VERTICAL_STRIDE_4;
+ lastTsReg.width = GEN_WIDTH_4;
+
+ GenRegister gids = GenRegister::toUniform(profilingReg4, GEN_TYPE_UD);
+ GenRegister tmp = GenRegister::toUniform(profilingReg4, GEN_TYPE_UD);
+
+ // X Y and Z
+ this->calcGlobalXYZRange(gids, tmp, 0, 1);
+
+ p->push(); {
+ p->curr.execWidth = 4;
+ p->curr.predicate = GEN_PREDICATE_NONE;
+ p->curr.noMask = 1;
+ p->MOV(prolog, tmArf);
+ p->MOV(lastTsReg, tmArf);
+ } p->pop();
+
+ p->NOP();
+ p->NOP();
+ return;
+ }
+
void GenContext::emitCalcTimestampInstruction(const SelectionInstruction &insn) {
}
@@ -2317,6 +2430,9 @@ namespace gbe
schedulePostRegAllocation(*this, *this->sel);
if (OCL_OUTPUT_REG_ALLOC)
ra->outputAllocation();
+ if (inProfilingMode) { // add the profiling prolog before do anything.
+ this->profilingProlog();
+ }
this->clearFlagRegister();
this->emitStackPointer();
this->emitSLMOffset();
diff --git a/backend/src/backend/gen_context.hpp b/backend/src/backend/gen_context.hpp
index bbd48cf..e36c8e6 100644
--- a/backend/src/backend/gen_context.hpp
+++ b/backend/src/backend/gen_context.hpp
@@ -86,6 +86,7 @@ namespace gbe
/*! Simd width chosen for the current function */
INLINE uint32_t getSimdWidth(void) const { return simdWidth; }
void clearFlagRegister(void);
+ void profilingProlog(void);
/*! check the flag reg, if is grf, use f0.1 instead */
GenRegister checkFlagRegister(GenRegister flagReg);
/*! Emit the per-lane stack pointer computation */
@@ -221,6 +222,7 @@ namespace gbe
void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t subValue = 0);
virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0, int sz = 0);
+ void calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int flag, int subFlag);
private:
CompileErrorCode errCode;
--
1.7.9.5
More information about the Beignet
mailing list