[Beignet] [PATCH 3/5 OpenCL-2.0] Add WorkGroup functions to Gen IR logic in llvm_gen_backend.
Yang, Rong R
rong.r.yang at intel.com
Tue May 19 01:56:01 PDT 2015
One comment.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Tuesday, April 21, 2015 14:11
> To: beignet at lists.freedesktop.org
> Cc: Junyan He
> Subject: [Beignet] [PATCH 3/5 OpenCL-2.0] Add WorkGroup functions to Gen
> IR logic in llvm_gen_backend.
>
> From: Junyan He <junyan.he at linux.intel.com>
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/llvm/llvm_gen_backend.cpp | 79
> +++++++++++++++++++++++++++-
> backend/src/llvm/llvm_gen_ocl_function.hxx | 18 +++++++
> 2 files changed, 96 insertions(+), 1 deletion(-)
>
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index bf03a13..21738e9 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -490,6 +490,7 @@ namespace gbe
> LoopInfo *LI;
> const Module *TheModule;
> int btiBase;
> + int32_t wgBroadcastSLM;
> public:
> static char ID;
> explicit GenWriter(ir::Unit &unit)
> @@ -499,7 +500,8 @@ namespace gbe
> regTranslator(ctx),
> LI(0),
> TheModule(0),
> - btiBase(BTI_RESERVED_NUM)
> + btiBase(BTI_RESERVED_NUM),
> + wgBroadcastSLM(-1)
> {
> initializeLoopInfoPass(*PassRegistry::getPassRegistry());
> pass = PASS_EMIT_REGISTERS;
> @@ -624,6 +626,8 @@ namespace gbe
> void emitUnaryCallInst(CallInst &I, CallSite &CS, ir::Opcode opcode,
> ir::Type = ir::TYPE_FLOAT);
> // Emit unary instructions from gen native function
> void emitAtomicInst(CallInst &I, CallSite &CS, ir::AtomicOps opcode);
> + // Emit workgroup instructions
> + void emitWorkGroupInst(CallInst &I, CallSite &CS, ir::WorkGroupOps
> + opcode);
>
> uint8_t appendSampler(CallSite::arg_iterator AI);
> uint8_t getImageID(CallInst &I);
> @@ -2792,6 +2796,18 @@ namespace gbe
> case GEN_OCL_SIMD_ALL:
> case GEN_OCL_READ_TM:
> case GEN_OCL_REGION:
> + case GEN_OCL_WORK_GROUP_ALL:
> + case GEN_OCL_WORK_GROUP_ANY:
> + case GEN_OCL_WORK_GROUP_BROADCAST:
> + case GEN_OCL_WORK_GROUP_REDUCE_ADD:
> + case GEN_OCL_WORK_GROUP_REDUCE_MAX:
> + case GEN_OCL_WORK_GROUP_REDUCE_MIN:
> + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_ADD:
> + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MAX:
> + case GEN_OCL_WORK_GROUP_SCAN_EXCLUSIVE_MIN:
> + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_ADD:
> + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MAX:
> + case GEN_OCL_WORK_GROUP_SCAN_INCLUSIVE_MIN:
> this->newRegister(&I);
> break;
> case GEN_OCL_PRINTF:
> @@ -2833,6 +2849,44 @@ namespace gbe
> ctx.ATOMIC(opcode, dst, addrSpace, bti, srcTuple);
> }
>
> + void GenWriter::emitWorkGroupInst(CallInst &I, CallSite &CS,
> ir::WorkGroupOps opcode) {
> + if (wgBroadcastSLM < 0 && opcode == ir::WORKGROUP_OP_BROADCAST)
> {
> + ir::Function &f = ctx.getFunction();
> + uint32_t mapSize = 8;
> + f.setUseSLM(true);
> + uint32_t oldSlm = f.getSLMSize();
> + f.setSLMSize(oldSlm + mapSize);
> + wgBroadcastSLM = oldSlm;
> + GBE_ASSERT(wgBroadcastSLM >= 0);
> + }
> +
> + CallSite::arg_iterator AI = CS.arg_begin();
> + CallSite::arg_iterator AE = CS.arg_end();
> + GBE_ASSERT(AI != AE);
> +
> + if (opcode == ir::WORKGROUP_OP_ALL || opcode ==
> ir::WORKGROUP_OP_ANY) {
> + GBE_ASSERT(getType(ctx, (*AI)->getType()) == ir::TYPE_S32);
> + const ir::Register src = this->getRegister(*(AI++));
> + const ir::Tuple srcTuple = ctx.arrayTuple(&src, 1);
> + ctx.WORKGROUP(opcode, (uint32_t)0, getRegister(&I), srcTuple, 1,
> ir::TYPE_S32);
> + } else if (opcode == ir::WORKGROUP_OP_BROADCAST) {
> + int argNum = CS.arg_size();
> + ir::Register src[argNum];
> + for (int i = 0; i < argNum; i++) {
> + src[i] = this->getRegister(*(AI++));
> + }
> + const ir::Tuple srcTuple = ctx.arrayTuple(&src[0], argNum);
> + ctx.WORKGROUP(ir::WORKGROUP_OP_BROADCAST,
> (uint32_t)wgBroadcastSLM, getRegister(&I), srcTuple, argNum,
> + getType(ctx, (*AI)->getType()));
AI == AE here, (*AI)->getType() is not safe.
> + } else {
> + const ir::Register src = this->getRegister(*(AI++));
> + const ir::Tuple srcTuple = ctx.arrayTuple(&src, 1);
> + ctx.WORKGROUP(opcode, (uint32_t)0, getRegister(&I), srcTuple, 1,
> getType(ctx, (*AI)->getType()));
AI == AE here, (*AI)->getType() is not safe.
> + }
> +
> + GBE_ASSERT(AI == AE);
> + }
> +
More information about the Beignet
mailing list