[Beignet] [PATCH 19/19] Backend: Implement StoreProfilingInstruction in GenContext.

Yang, Rong R rong.r.yang at intel.com
Tue Nov 3 06:06:56 PST 2015



> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Wednesday, September 9, 2015 8:01
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH 19/19] Backend: Implement
> StoreProfilingInstruction in GenContext.
> 
> From: Junyan He <junyan.he at linux.intel.com>
> 
> The offset 0 of the profiling buffer contains the log number.
> We will use atomic instruction to inc it every time a log is generated.
> We will generate one log for each HW gpu thread. The log contains the XYZ
> range of global work items which are executed on this thread, the EU id, the
> Sub Slice id,  thread number, and 20 points' timestamp which we are
> interested in.
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  backend/src/backend/gen_context.cpp |  173
> +++++++++++++++++++++++++++++++++++
>  1 file changed, 173 insertions(+)
> 
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 26af4cd..df36e9a 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2469,6 +2469,179 @@ namespace gbe
>    }
> 
>    void GenContext::emitStoreProfilingInstruction(const SelectionInstruction
> &insn) {
> +    uint32_t simdType;
> +    if (this->simdWidth == 16) {
> +      simdType = ir::ProfilingInfo::ProfilingSimdType16;
> +    } else if (this->simdWidth == 8) {
> +      simdType = ir::ProfilingInfo::ProfilingSimdType8;
> +    } else {
> +      simdType = ir::ProfilingInfo::ProfilingSimdType1;
> +      GBE_ASSERT(0);
> +    }
> +
> +    p->NOP();
> +    p->NOP();
> +
> +    GenRegister tmArf = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
> +        0xc0,
> +        0,
> +        GEN_TYPE_UW,
> +        GEN_VERTICAL_STRIDE_4,
> +        GEN_WIDTH_4,
> +        GEN_HORIZONTAL_STRIDE_1);
It's better to add a tmArf function in GenRegister class and get it by call this function instead of hard code in the patchset.

> +    GenRegister profilingReg[5];
> +    if (p->curr.execWidth == 16) {
> +      profilingReg[0] = GenRegister::retype(ra->genReg(insn.src(0)),
> GEN_TYPE_UD);
> +      profilingReg[1] = GenRegister::offset(profilingReg[0], 1);
> +      profilingReg[2] = GenRegister::retype(ra->genReg(insn.src(1)),
> GEN_TYPE_UD);
> +      profilingReg[3] = GenRegister::offset(profilingReg[2], 1);
> +      profilingReg[4] = GenRegister::retype(ra->genReg(insn.src(2)),
> GEN_TYPE_UD);
> +    } else {
> +      GBE_ASSERT(p->curr.execWidth == 8);
> +      profilingReg[0] = GenRegister::retype(ra->genReg(insn.src(0)),
> GEN_TYPE_UD);
> +      profilingReg[1] = GenRegister::retype(ra->genReg(insn.src(1)),
> GEN_TYPE_UD);
> +      profilingReg[2] = GenRegister::retype(ra->genReg(insn.src(2)),
> GEN_TYPE_UD);
> +      profilingReg[3] = GenRegister::retype(ra->genReg(insn.src(3)),
> GEN_TYPE_UD);
> +      profilingReg[4] = GenRegister::retype(ra->genReg(insn.src(4)),
> GEN_TYPE_UD);
> +    }
> +    GenRegister tmp = ra->genReg(insn.dst(0));
> +    uint32_t profilingType = insn.extra.profilingType;
> +    uint32_t bti = insn.extra.profilingBTI;
> +    GBE_ASSERT(profilingType == 1);
> +    GenRegister flagReg = GenRegister::flag(insn.state.flag,
> insn.state.subFlag);
> +    GenRegister lastTsReg = GenRegister::toUniform(profilingReg[3],
> GEN_TYPE_UL);
> +    lastTsReg = GenRegister::offset(lastTsReg, 0, 2*sizeof(uint64_t));
> +    GenRegister realClock = GenRegister::offset(lastTsReg, 0,
> sizeof(uint64_t));
> +    GenRegister tmp0 = GenRegister::toUniform(profilingReg[3],
> + GEN_TYPE_UL);
> +
> +    /* MOV(4)   tmp0<1>:UW	 arf_tm<4,4,1>:UW  */
> +    p->push(); {
> +      p->curr.execWidth = 4;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->curr.noMask = 1;
> +      GenRegister _tmp0 = tmp0;
> +      _tmp0.type = GEN_TYPE_UW;
> +      _tmp0.hstride = GEN_HORIZONTAL_STRIDE_1;
> +      _tmp0.vstride = GEN_VERTICAL_STRIDE_4;
> +      _tmp0.width = GEN_WIDTH_4;
> +      p->MOV(_tmp0, tmArf);
> +    } p->pop();
> +
> +    /* Calc the time elapsed. */
> +    subTimestamps(tmp0, lastTsReg, tmp);
> +    /* Update the real clock */
> +    addTimestamps(realClock, tmp0, tmp);
> +
> +    //the epilog, record the last timestamp and return.
> +    /* MOV(1)   epilog<1>:UL   realclock<0,1,0>:UL  */
> +    /* ADD(1)   epilog<1>:UL   prolog<0,1,0>:UL  */
> +    GenRegister prolog = GenRegister::toUniform(profilingReg[2],
> GEN_TYPE_UD);
> +    prolog = GenRegister::offset(prolog, 0, 4*sizeof(uint32_t));
> +    GenRegister epilog = GenRegister::offset(prolog, 0, 2*sizeof(uint32_t));
> +    p->push(); {
> +      p->curr.execWidth = 1;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->curr.noMask = 1;
> +      p->MOV(epilog, GenRegister::retype(realClock, GEN_TYPE_UD));
> +      p->MOV(GenRegister::offset(epilog, 0, sizeof(uint32_t)),
> +          GenRegister::offset(GenRegister::retype(realClock, GEN_TYPE_UD), 0,
> sizeof(uint32_t)));
> +      addTimestamps(epilog, prolog, tmp);
> +    } p->pop();
> +
> +    /* Now, begin to write the results out. */
> +    // Inc the log items number.
> +    p->push(); {
> +      //ptr[0] is the total count of the log items.
> +      GenRegister sndMsg = GenRegister::retype(tmp, GEN_TYPE_UD);
> +      sndMsg.width = GEN_WIDTH_8;
> +      sndMsg.hstride = GEN_HORIZONTAL_STRIDE_1;
> +      sndMsg.vstride = GEN_VERTICAL_STRIDE_8;
> +      p->curr.execWidth = 8;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->curr.noMask = 1;
> +      p->MOV(sndMsg, GenRegister::immud(0x0));
> +
> +      GenRegister incRes = GenRegister::offset(sndMsg, 1);
> +      p->push();
> +      {
> +        p->curr.execWidth = 1;
> +        p->MOV(flagReg, GenRegister::immuw(0x01));
> +      }
> +      p->pop();
> +      p->curr.useFlag(insn.state.flag, insn.state.subFlag);
> +      p->curr.predicate = GEN_PREDICATE_NORMAL;
> +      p->ATOMIC(incRes, GEN_ATOMIC_OP_INC, sndMsg,
> GenRegister::immud(bti), 1);
> +    } p->pop();
> +
> +    // Calculate the final addr
> +    GenRegister addr = GenRegister::retype(tmp, GEN_TYPE_UD);
> +    addr.width = GEN_WIDTH_8;
> +    addr.hstride = GEN_HORIZONTAL_STRIDE_1;
> +    addr.vstride = GEN_VERTICAL_STRIDE_8;
> +    p->push(); {
> +      GenRegister offset = GenRegister::offset(addr, 1);
> +
> +      p->curr.execWidth = 8;
> +      p->curr.noMask = 1;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->MUL(addr, GenRegister::toUniform(offset, GEN_TYPE_UD),
> +          GenRegister::immud(sizeof(ir::ProfilingInfo::ProfilingReportItem)));
> +      p->ADD(addr, addr, GenRegister::immud(4)); // for the counter
> +      p->curr.execWidth = 1;
> +      for (int i = 1; i < 8; i++) {
> +        p->ADD(GenRegister::toUniform(GenRegister::offset(addr, 0,
> i*sizeof(uint32_t)), GEN_TYPE_UD),
> +            GenRegister::toUniform(GenRegister::offset(addr, 0,
> i*sizeof(uint32_t)), GEN_TYPE_UD),
> +            GenRegister::immud(i*sizeof(uint32_t)));
> +      }
> +    } p->pop();
> +
> +    GenRegister data = GenRegister::offset(addr, 1);
> +    p->push(); {
> +      p->curr.execWidth = 8;
> +      p->curr.noMask = 1;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->MOV(data, profilingReg[4]);
> +    } p->pop();
> +
> +    // Write the result out
> +    p->push(); {
> +      GenRegister ffid = GenRegister::toUniform(data, GEN_TYPE_UD);
> +      GenRegister tmp = GenRegister::toUniform(profilingReg[3],
> GEN_TYPE_UD);
> +      GenRegister stateReg =
> GenRegister(GEN_ARCHITECTURE_REGISTER_FILE, GEN_ARF_STATE, 0,
> +          GEN_TYPE_UD, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1,
> GEN_HORIZONTAL_STRIDE_1);
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->curr.noMask = 1;
> +      p->curr.execWidth = 1;
> +      p->MOV(ffid, stateReg);
> +      p->SHR(ffid, ffid, GenRegister::immud(24));
> +      p->AND(ffid, ffid, GenRegister::immud(0x0ff));
> +      p->OR(ffid, ffid, GenRegister::immud(simdType << 4));
> +
> +      GenRegister genInfo = GenRegister::offset(ffid, 0, 4);
> +      p->MOV(genInfo, stateReg);
> +      p->AND(genInfo, genInfo, GenRegister::immud(0x0ff07));
> +      //The dispatch mask
> +      stateReg = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
> GEN_ARF_STATE, 2,
> +          GEN_TYPE_UD, GEN_VERTICAL_STRIDE_0, GEN_WIDTH_1,
> GEN_HORIZONTAL_STRIDE_1);
> +      p->MOV(tmp, stateReg);
> +      p->AND(tmp, tmp, GenRegister::immud(0x0000ffff));
> +      p->SHL(tmp, tmp, GenRegister::immud(16));
> +      p->OR(genInfo, genInfo, tmp);
> +
> +      // Write it out.
> +      p->curr.execWidth = 8;
> +      p->curr.noMask = 1;
> +      p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> +      p->ADD(addr, addr, GenRegister::immud(32));
> +
> +      // time stamps
> +      for (int i = 0; i < 3; i++) {
> +        p->curr.execWidth = 8;
> +        p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD));
> +        p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1);
> +        p->ADD(addr, addr, GenRegister::immud(32));
> +      }
> +    } p->pop();
>    }
> 
>    void GenContext::setA0Content(uint16_t new_a0[16], uint16_t max_offset,
> int sz) {
> --
> 1.7.9.5
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list