[Beignet] [PATCH 4/5 OpenCL-2.0] Handle the WorkGroup_Broadcast logic in insn_selection.

Yang, Rong R rong.r.yang at intel.com
Tue May 19 01:51:43 PDT 2015


One comment.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Tuesday, April 21, 2015 14:12
> To: beignet at lists.freedesktop.org
> Cc: Junyan He
> Subject: [Beignet] [PATCH 4/5 OpenCL-2.0] Handle the
> WorkGroup_Broadcast logic in insn_selection.
> 
> From: Junyan He <junyan.he at linux.intel.com>
> 
> We use slm to store the value which will be broadcasted to the whole work
> group.
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  backend/src/backend/gen_insn_selection.cpp |   92
> ++++++++++++++++++++++++++++
>  1 file changed, 92 insertions(+)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index c240261..f8f1d29 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -4107,6 +4107,97 @@ namespace gbe
>      DECL_CTOR(AtomicInstruction, 1, 1);
>    };
> 
> +  /*! WorkGroup instruction pattern */
> +  DECL_PATTERN(WorkGroupInstruction)
> +  {
> +    INLINE bool emitWGBroadcast(Selection::Opaque &sel, const
> ir::WorkGroupInstruction &insn) const {
> +      /*  1. BARRIER    Ensure all the threads have set the correct value for the
> var which will be broadcasted.
> +          2. CMP IDs    Compare the local IDs with the specified ones in the
> function call.
> +          3. STORE      Use flag to control the store of the var. Only the specified
> item will execute the store.
> +          4. BARRIER    Ensure the specified value has been stored.
> +          5. LOAD       Load the stored value to all the dst value, the dst of all the
> items will have same value,
> +                        so broadcasted.  */
> +      using namespace ir;
> +      const Type type = insn.getType();
> +      const GenRegister src = sel.selReg(insn.getSrc(0), type);
> +      const GenRegister dst = sel.selReg(insn.getDst(0), type);
> +      const uint32_t srcNum = insn.getSrcNum();
> +      const uint32_t simdWidth = sel.ctx.getSimdWidth();
> +      const uint32_t slmAddr = insn.getSlmAddr();
> +      GenRegister addr = sel.selReg(sel.reg(FAMILY_DWORD),
> + ir::TYPE_U32);
> +
> +      /* Then we insert a barrier to make sure all the var we are interested in
> +         have been assigned the final value. */
> +      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)),
> + sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
> +
> +      GBE_ASSERT(srcNum >= 2);
> +      GenRegister coords[3];
> +      for (uint32_t i = 1; i < srcNum; i++) {
> +        coords[i - 1] = sel.selReg(insn.getSrc(i), TYPE_U32);
> +      }
> +
> +      sel.push(); {
> +        sel.curr.predicate = GEN_PREDICATE_NONE;
> +        sel.curr.noMask = 1;
> +        sel.MOV(addr, GenRegister::immud(slmAddr));
> +      } sel.pop();
> +
> +      sel.push(); {
> +        sel.curr.flag = 0;
> +        sel.curr.subFlag = 1;
> +        sel.curr.predicate = GEN_PREDICATE_NONE;
> +        sel.curr.noMask = 1;
> +        GenRegister lid0, lid1, lid2;
> +        uint32_t dim = srcNum - 1;
> +        if (simdWidth == 16) {
> +          lid0 = GenRegister::ud16grf(ir::ocl::lid0);
> +          lid1 = GenRegister::ud16grf(ir::ocl::lid1);
> +          lid2 = GenRegister::ud16grf(ir::ocl::lid2);
> +        } else {
> +          lid0 = GenRegister::ud8grf(ir::ocl::lid0);
> +          lid1 = GenRegister::ud8grf(ir::ocl::lid1);
> +          lid2 = GenRegister::ud8grf(ir::ocl::lid2);
> +        }
It's better to use sel.selReg here.

> +
> +        sel.CMP(GEN_CONDITIONAL_EQ, coords[0], lid0,
> GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
> +        sel.curr.predicate = GEN_PREDICATE_NORMAL;
> +        if (dim >= 2)
> +          sel.CMP(GEN_CONDITIONAL_EQ, coords[1], lid1,
> GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
> +        if (dim >= 3)
> +          sel.CMP(GEN_CONDITIONAL_EQ, coords[2], lid2,
> + GenRegister::retype(GenRegister::null(), GEN_TYPE_UD));
> +
> +        if (typeSize(src.type) == 4) {
> +          GenRegister _addr = GenRegister::retype(addr, GEN_TYPE_F);
> +          GenRegister _src = GenRegister::retype(src, GEN_TYPE_F);
> +          sel.UNTYPED_WRITE(_addr, &_src, 1, 0xfe);
> +        }
> +      } sel.pop();
> +
> +      /* Make sure the slm var have the valid value now */
> +      sel.BARRIER(GenRegister::ud8grf(sel.reg(FAMILY_DWORD)),
> + sel.selReg(sel.reg(FAMILY_DWORD)), syncLocalBarrier);
> +
> +      if (typeSize(src.type) == 4) {
> +        sel.UNTYPED_READ(addr, &dst, 1, 0xfe);
> +      }
> +
> +      return true;
> +    }
> +
> +    INLINE bool emitOne(Selection::Opaque &sel, const
> ir::WorkGroupInstruction &insn, bool &markChildren) const
> +    {
> +      using namespace ir;
> +      const WorkGroupOps workGroupOp = insn.getWorkGroupOpcode();
> +
> +      if (workGroupOp == WORKGROUP_OP_BROADCAST) {
> +        return emitWGBroadcast(sel, insn);
> +      } else {
> +        GBE_ASSERT(0);
> +      }
> +      return true;
> +    }
> +    DECL_CTOR(WorkGroupInstruction, 1, 1);  };
> +
>    /*! Select instruction pattern */
>    class SelectInstructionPattern : public SelectionPattern
>    {
> @@ -4789,6 +4880,7 @@ namespace gbe
>      this->insert<GetImageInfoInstructionPattern>();
>      this->insert<ReadARFInstructionPattern>();
>      this->insert<RegionInstructionPattern>();
> +    this->insert<WorkGroupInstructionPattern>();
> 
>      // Sort all the patterns with the number of instructions they output
>      for (uint32_t op = 0; op < ir::OP_INVALID; ++op)
> --
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list