[Beignet] [PATCH 03/10 OpenCL-2.0] Add the store and load for thead/TID-EUID map.
junyan.he at inbox.com
junyan.he at inbox.com
Wed Apr 22 20:25:48 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen_insn_selection.cpp | 106 +++++++++++++++++++++++++++-
backend/src/backend/gen_register.hpp | 10 +++
2 files changed, 114 insertions(+), 2 deletions(-)
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index f8f1d29..797233e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -474,6 +474,8 @@ namespace gbe
uint32_t vectorNum;
/*! If true, generate code backward */
bool bwdCodeGeneration;
+ /*! If true, the thread map has been stored */
+ bool storeThreadMap;
/*! To make function prototypes more readable */
typedef const GenRegister &Reg;
@@ -701,8 +703,9 @@ namespace gbe
ctx(ctx), block(NULL),
curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()),
maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum),
- stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()),
- bHas32X32Mul(false), bHasLongType(false), ldMsgOrder(LD_MSG_ORDER_IVB)
+ stateNum(0), vectorNum(0), bwdCodeGeneration(false), storeThreadMap(false),
+ currAuxLabel(ctx.getFunction().labelNum()), bHas32X32Mul(false), bHasLongType(false),
+ ldMsgOrder(LD_MSG_ORDER_IVB)
{
const ir::Function &fn = ctx.getFunction();
this->regNum = fn.regNum();
@@ -4110,6 +4113,105 @@ namespace gbe
/*! WorkGroup instruction pattern */
DECL_PATTERN(WorkGroupInstruction)
{
+ INLINE bool storeThreadID(Selection::Opaque &sel, uint32_t slmAddr) const
+ {
+ using namespace ir;
+ GenRegister sr0_0 = GenRegister::retype(GenRegister::sr(0), GEN_TYPE_UW);
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ GenRegister tmp;
+ GenRegister addr;
+ if (simdWidth == 16) {
+ tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U16), GEN_TYPE_UD);
+ addr = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U16), GEN_TYPE_UD);
+ } else {
+ tmp = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ }
+
+ sr0_0.vstride = GEN_VERTICAL_STRIDE_0;
+ sr0_0.hstride = GEN_HORIZONTAL_STRIDE_0;
+ sr0_0.width = GEN_WIDTH_1;
+ sel.push(); {
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.curr.execWidth = 8;
+
+ sel.MOV(tmp, sr0_0);
+
+ sel.MUL(addr, sel.selReg(ocl::threadid, ir::TYPE_U32), GenRegister::immud(2));
+ sel.ADD(addr, addr, GenRegister::immud(slmAddr));
+
+ sel.push(); {
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.push(); {
+ sel.curr.execWidth = 1;
+ sel.MOV(GenRegister::flag(0, 1), GenRegister::immuw(0x01));
+ } sel.pop();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
+ sel.curr.noMask = 0;
+ sel.BYTE_SCATTER(addr, tmp, 1, 0xfe);
+ } sel.pop();
+ } sel.pop();
+ return true;
+ }
+
+ INLINE GenRegister getNextThreadID(Selection::Opaque &sel, uint32_t slmAddr) const
+ {
+ using namespace ir;
+ const uint32_t simdWidth = sel.ctx.getSimdWidth();
+ GenRegister addr;
+ GenRegister nextThread;
+ GenRegister tid;
+ if (simdWidth == 16) {
+ addr = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U16), GEN_TYPE_UD);
+ nextThread = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U16), GEN_TYPE_UD);
+ tid = GenRegister::retype(sel.selReg(sel.reg(FAMILY_WORD), ir::TYPE_U16), GEN_TYPE_UD);
+ } else {
+ addr = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ nextThread = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ tid = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32);
+ }
+
+ sel.push(); {
+ sel.curr.execWidth = 8;
+ sel.ADD(nextThread, sel.selReg(ocl::threadid, ir::TYPE_U32), GenRegister::immud(1));
+
+ /* Wrap the next thread id. */
+ sel.push(); {
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.CMP(GEN_CONDITIONAL_EQ, nextThread, sel.selReg(ocl::threadn, ir::TYPE_U32), GenRegister::null());
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
+ sel.curr.noMask = 0;
+ sel.MOV(nextThread, GenRegister::immud(0));
+ } sel.pop();
+
+ sel.MUL(addr, nextThread, GenRegister::immud(2));
+ sel.ADD(addr, addr, GenRegister::immud(slmAddr));
+
+ sel.push(); {
+ sel.curr.predicate = GEN_PREDICATE_NONE;
+ sel.curr.noMask = 1;
+ sel.push(); {
+ sel.curr.execWidth = 1;
+ sel.MOV(GenRegister::flag(0, 1), GenRegister::immuw(0x010));
+ } sel.pop();
+ sel.curr.flag = 0;
+ sel.curr.subFlag = 1;
+ sel.curr.predicate = GEN_PREDICATE_NORMAL;
+ sel.curr.noMask = 0;
+ sel.BYTE_GATHER(tid, addr, 1, 0xfe);
+ } sel.pop();
+
+ } sel.pop();
+ return tid;
+ }
+
INLINE bool emitWGBroadcast(Selection::Opaque &sel, const ir::WorkGroupInstruction &insn) const {
/* 1. BARRIER Ensure all the threads have set the correct value for the var which will be broadcasted.
2. CMP IDs Compare the local IDs with the specified ones in the function call.
diff --git a/backend/src/backend/gen_register.hpp b/backend/src/backend/gen_register.hpp
index 3b40b67..6e5321c 100644
--- a/backend/src/backend/gen_register.hpp
+++ b/backend/src/backend/gen_register.hpp
@@ -797,6 +797,16 @@ namespace gbe
GEN_HORIZONTAL_STRIDE_0);
}
+ static INLINE GenRegister sr(uint32_t nr, uint32_t subnr = 0) {
+ return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+ GEN_ARF_STATE | nr,
+ subnr,
+ GEN_TYPE_UD,
+ GEN_VERTICAL_STRIDE_8,
+ GEN_WIDTH_8,
+ GEN_HORIZONTAL_STRIDE_1);
+ }
+
static INLINE GenRegister notification1(void) {
return GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
GEN_ARF_NOTIFICATION_COUNT,
--
1.7.9.5
More information about the Beignet
mailing list