[Beignet] [PATCH 11/19] Backend: Add profilingProlog function for GenContext.
Yang, Rong R
rong.r.yang at intel.com
Tue Nov 3 05:19:10 PST 2015
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Wednesday, September 9, 2015 8:01
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH 11/19] Backend: Add profilingProlog function for
> GenContext.
>
> From: Junyan He <junyan.he at linux.intel.com>
>
> The profilingProlog will collect useful information for profiling, including XYZ
> global range and prolog timestamp.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/backend/gen_context.cpp | 116
> +++++++++++++++++++++++++++++++++++
> backend/src/backend/gen_context.hpp | 2 +
> 2 files changed, 118 insertions(+)
>
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 435b224..696d86a 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2185,6 +2185,119 @@ namespace gbe
> p->TYPED_WRITE(header, true, bti);
> }
>
> + void GenContext::calcGlobalXYZRange(GenRegister& reg, GenRegister&
> +tmp, int flag, int subFlag)
> + {
> +#define CALC_GID(dim) do {\
> + GenRegister g##dim##start = GenRegister::offset(reg, 0, 8 + dim*8); \
> + GenRegister g##dim##end = GenRegister::offset(g##dim##start, 0, 4);
> +\
> + GenRegister id##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud16grf(ir::ocl::lid##di
> +m)), GEN_TYPE_UD); \
> + GenRegister localsz##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::lsize##d
> +im)), GEN_TYPE_UD); \
> + GenRegister gid##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::groupid#
> +#dim)), GEN_TYPE_UD); \
> + GenRegister goffset##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::goffset#
> +#dim)), GEN_TYPE_UD); \
> + p->MUL(g##dim##start, localsz##dim, gid##dim); \
> + p->ADD(g##dim##start, g##dim##start, id##dim); \
> + p->ADD(g##dim##start, g##dim##start, goffset##dim); \
> + GenRegister ip; \
> + p->MOV(flagReg, GenRegister::immuw(0x0)); \
> + p->curr.useFlag(flag, subFlag); \
> + p->curr.predicate = GEN_PREDICATE_NONE; \
> + if (this->simdWidth == 16) \
> + p->curr.execWidth = 16; \
> + else \
> + p->curr.execWidth = 8; \
> + if (!isDWLabel()) { \
> + ip = ra->genReg(GenRegister::uw16grf(ir::ocl::blockip)); \
> + p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immuw(0xffff)); \
> + } else { \
> + ip = ra->genReg(GenRegister::ud16grf(ir::ocl::dwblockip)); \
> + p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immud(0xffffffff)); \
> + } \
> + p->curr.execWidth = 1; \
> + p->MOV(GenRegister::retype(tmp, GEN_TYPE_UW), flagReg); \
> + if (this->simdWidth == 16) \
> + p->OR(tmp, tmp, GenRegister::immud(0xffff0000)); \
> + else \
> + p->OR(tmp, tmp, GenRegister::immud(0xffffff00)); \
> + p->FBL(tmp, tmp); \
> + p->ADD(tmp, tmp, GenRegister::negate(GenRegister::immud(0x1))); \
> + p->MUL(tmp, tmp, GenRegister::immud(4)); \
> + p->MOV(GenRegister::addr1(0), GenRegister::retype(tmp,
> GEN_TYPE_UW));
> +\
> + GenRegister dimEnd = GenRegister::to_indirect1xN(id##dim, 0); \
> + p->MOV(tmp, dimEnd); \
> + p->MUL(g##dim##end, localsz##dim, gid##dim); \
> + p->ADD(g##dim##end, g##dim##end, tmp); \
> + p->ADD(g##dim##end, g##dim##end, goffset##dim); \ } while(0)
> +
The macro CALC_GID is too long, it is hard to debug and read, actually the macro only to get
ir::ocl::lid, ir::ocl::lsize, ir::ocl::groupid and ir::ocl::goffset. Because the dim is 3, can you use the
if/else for it or define a macro only for these register.
> + GenRegister flagReg = GenRegister::flag(flag, subFlag);
> + p->push(); {
> + p->curr.execWidth = 1;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + CALC_GID(0);
> + CALC_GID(1);
> + CALC_GID(2);
> + } p->pop();
> +
> +#undef CALC_GID
> + }
> +
> + void GenContext::profilingProlog(void) {
> + // record the prolog, globalXYZ and lasttimestamp at the very beginning.
> + GenRegister profilingReg2, profilingReg3, profilingReg4;
> + GenRegister tmArf = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
> + 0xc0,
> + 0,
> + GEN_TYPE_UW,
> + GEN_VERTICAL_STRIDE_4,
> + GEN_WIDTH_4,
> + GEN_HORIZONTAL_STRIDE_1);
> + if (this->simdWidth == 16) {
> + profilingReg2 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts1));
> + profilingReg3 = GenRegister::offset(profilingReg2, 1);
> + profilingReg4 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts2));
> + } else {
> + GBE_ASSERT(this->simdWidth == 8);
> + profilingReg2 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts2));
> + profilingReg3 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts3));
> + profilingReg4 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts4));
> + }
> +
> + /* MOV(4) prolog<1>:UW arf_tm<4,4,1>:UW */
> + /* MOV(4) lastTsReg<1>:UW prolog<4,4,1>:UW */
> + GenRegister prolog = profilingReg2;
> + prolog.type = GEN_TYPE_UW;
> + prolog.hstride = GEN_HORIZONTAL_STRIDE_1;
> + prolog.vstride = GEN_VERTICAL_STRIDE_4;
> + prolog.width = GEN_WIDTH_4;
> + prolog = GenRegister::offset(prolog, 0, 4*sizeof(uint32_t));
> +
> + GenRegister lastTsReg = GenRegister::toUniform(profilingReg3,
> GEN_TYPE_UL);
> + lastTsReg = GenRegister::offset(lastTsReg, 0, 2*sizeof(uint64_t));
> + lastTsReg.type = GEN_TYPE_UW;
> + lastTsReg.hstride = GEN_HORIZONTAL_STRIDE_1;
> + lastTsReg.vstride = GEN_VERTICAL_STRIDE_4;
> + lastTsReg.width = GEN_WIDTH_4;
> +
> + GenRegister gids = GenRegister::toUniform(profilingReg4,
> GEN_TYPE_UD);
> + GenRegister tmp = GenRegister::toUniform(profilingReg4,
> + GEN_TYPE_UD);
> +
> + // X Y and Z
> + this->calcGlobalXYZRange(gids, tmp, 0, 1);
> +
> + p->push(); {
> + p->curr.execWidth = 4;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + p->MOV(prolog, tmArf);
> + p->MOV(lastTsReg, tmArf);
> + } p->pop();
> +
> + p->NOP();
> + p->NOP();
> + return;
> + }
> +
> void GenContext::emitCalcTimestampInstruction(const SelectionInstruction
> &insn) {
>
> }
> @@ -2317,6 +2430,9 @@ namespace gbe
> schedulePostRegAllocation(*this, *this->sel);
> if (OCL_OUTPUT_REG_ALLOC)
> ra->outputAllocation();
> + if (inProfilingMode) { // add the profiling prolog before do anything.
> + this->profilingProlog();
> + }
> this->clearFlagRegister();
> this->emitStackPointer();
> this->emitSLMOffset();
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index bbd48cf..e36c8e6 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -86,6 +86,7 @@ namespace gbe
> /*! Simd width chosen for the current function */
> INLINE uint32_t getSimdWidth(void) const { return simdWidth; }
> void clearFlagRegister(void);
> + void profilingProlog(void);
> /*! check the flag reg, if is grf, use f0.1 instead */
> GenRegister checkFlagRegister(GenRegister flagReg);
> /*! Emit the per-lane stack pointer computation */ @@ -221,6 +222,7 @@
> namespace gbe
> void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t
> subValue = 0);
>
> virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset = 0,
> int sz = 0);
> + void calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int
> + flag, int subFlag);
>
> private:
> CompileErrorCode errCode;
> --
> 1.7.9.5
>
>
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list