[Beignet] [Patch V2 2/3] OCL20/GBE: Change the pointer relative op's type.
Yang Rong
rong.r.yang at intel.com
Fri Dec 4 00:30:31 PST 2015
Can't use 32bits ops on pointer relative instructions.
Prepare to enable SPIR64.
V2: Set the pointers' family to QWORD and remove useless code.
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
backend/src/backend/gen_context.cpp | 17 +++++++++++++++
backend/src/ir/lowering.cpp | 10 +++++++--
backend/src/ir/profile.cpp | 6 +++---
backend/src/llvm/llvm_gen_backend.cpp | 37 +++++++++++++++++++++------------
backend/src/llvm/llvm_printf_parser.cpp | 27 +++++++++++++++++-------
5 files changed, 71 insertions(+), 26 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 02d0bfd..cef4e4c 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -242,6 +242,23 @@ namespace gbe
p->MUL(tmpReg, tmpReg, GenRegister::immuw(perThreadSize));
p->curr.execWidth = this->simdWidth;
p->ADD(stackptr, stackptr, tmpReg);
+ if (fn.getPointerFamily() == ir::FAMILY_QWORD) {
+ const GenRegister selStatckPtr2 = this->simdWidth == 8 ?
+ GenRegister::ul8grf(ir::ocl::stackptr) :
+ GenRegister::ul16grf(ir::ocl::stackptr);
+ const GenRegister stackptr2 = ra->genReg(selStatckPtr2);
+ int simdWidth = p->curr.execWidth;
+ if (simdWidth == 16) {
+ // we need do second quarter first, because the dst type is QW,
+ // while the src is DW. If we do first quater first, the 1st
+ // quarter's dst would contain the 2nd quarter's src.
+ p->curr.execWidth = 8;
+ p->curr.quarterControl = GEN_COMPRESSION_Q2;
+ p->MOV(GenRegister::Qn(stackptr2, 1), GenRegister::Qn(stackptr,1));
+ }
+ p->curr.quarterControl = GEN_COMPRESSION_Q1;
+ p->MOV(stackptr2, stackptr);
+ }
p->pop();
}
diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp
index 66ced8c..9ae90ef 100644
--- a/backend/src/ir/lowering.cpp
+++ b/backend/src/ir/lowering.cpp
@@ -367,8 +367,14 @@ namespace ir {
const uint32_t offset = valueID * size;
const Register reg = load->getValue(valueID);
-
- Instruction mov = ir::INDIRECT_MOV(type, reg, arg, load->getAddressRegister(), offset);
+ Register addressReg = load->getAddressRegister();
+ if (fn->getPointerFamily() == FAMILY_QWORD) {
+ Register tmp = fn->newRegister(FAMILY_DWORD);
+ Instruction cvt = ir::CVT(ir::TYPE_U32, ir::TYPE_U64, tmp, load->getAddressRegister());
+ cvt.insert(ins_after, &ins_after);
+ addressReg = tmp;
+ }
+ Instruction mov = ir::INDIRECT_MOV(type, reg, arg, addressReg, offset);
mov.insert(ins_after, &ins_after);
replaced = true;
}
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 4486863..3ead8a7 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -74,7 +74,7 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, goffset0, 1, GBE_CURBE_GLOBAL_OFFSET_X);
DECL_NEW_REG(FAMILY_DWORD, goffset1, 1, GBE_CURBE_GLOBAL_OFFSET_Y);
DECL_NEW_REG(FAMILY_DWORD, goffset2, 1, GBE_CURBE_GLOBAL_OFFSET_Z);
- DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
+ DECL_NEW_REG(FAMILY_QWORD, stackptr, 0);
DECL_NEW_REG(FAMILY_QWORD, stackbuffer, 1, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
DECL_NEW_REG(FAMILY_WORD, blockip, 0, GBE_CURBE_BLOCK_IP);
DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
@@ -83,8 +83,8 @@ namespace ir {
DECL_NEW_REG(FAMILY_DWORD, zero, 1);
DECL_NEW_REG(FAMILY_DWORD, one, 1);
DECL_NEW_REG(FAMILY_WORD, retVal, 1);
- DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
- DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
+ DECL_NEW_REG(FAMILY_QWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
+ DECL_NEW_REG(FAMILY_QWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0, GBE_CURBE_DW_BLOCK_IP);
}
#undef DECL_NEW_REG
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 39665b8..2ea5400 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1218,12 +1218,12 @@ namespace gbe
}
Builder.SetInsertPoint(cast<Instruction>(theUser));
- Type *int32Ty = Type::getInt32Ty(ptr->getContext());
- Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty);
+ Type *ptyTy = IntegerType::get(ptr->getContext(), ptr->getType()->getIntegerBitWidth());
+ Value *v1 = Builder.CreatePtrToInt(pointerOp, ptyTy);
- Value *v2 = Builder.CreatePtrToInt(getSinglePointerOrigin(pointerOp), int32Ty);
- Value *v3 = Builder.CreatePtrToInt(base, int32Ty);
- Value *v4 = Builder.CreatePtrToInt(bti, int32Ty);
+ Value *v2 = Builder.CreatePtrToInt(getSinglePointerOrigin(pointerOp), ptyTy);
+ Value *v3 = Builder.CreatePtrToInt(base, ptyTy);
+ Value *v4 = Builder.CreatePtrToInt(bti, ptyTy);
// newLocBase = (pointer - origin) + base_start
Value *diff = Builder.CreateSub(v1, v2);
Value *newLocBase = Builder.CreateAdd(v3, diff);
@@ -1600,7 +1600,10 @@ namespace gbe
// NULL pointers
if(isa<ConstantPointerNull>(CPV)) {
- return ctx.newImmediate(uint32_t(0));
+ if (ctx.getPointerFamily() == ir::FAMILY_QWORD)
+ return ctx.newImmediate(uint64_t(0));
+ else
+ return ctx.newImmediate(uint32_t(0));
}
const Type::TypeID typeID = CPV->getType()->getTypeID();
@@ -2553,13 +2556,13 @@ namespace gbe
this->newRegister(const_cast<GlobalVariable*>(&v));
ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
- ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(oldSlm + padding/8, ir::TYPE_S32));
+ ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(oldSlm + padding/8, getType(ctx, v.getType())));
} else if(addrSpace == ir::MEM_CONSTANT || v.isConstant()) {
GBE_ASSERT(v.hasInitializer());
this->newRegister(const_cast<GlobalVariable*>(&v));
ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
ir::Constant &con = unit.getConstantSet().getConstant(v.getName());
- ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
+ ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(con.getOffset(), getType(ctx, v.getType())));
} else {
if(v.getName().equals(StringRef("__gen_ocl_printf_buf"))) {
ctx.getFunction().getPrintfSet()->setBufBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
@@ -4342,15 +4345,23 @@ namespace gbe
uint32_t prevStackPtr = ctx.getFunction().getStackSize();
uint32_t step = ((prevStackPtr + (align - 1)) & ~(align - 1)) - prevStackPtr;
if (step != 0) {
- ir::ImmediateIndex stepImm = ctx.newIntegerImmediate(step, ir::TYPE_U32);
+ ir::ImmediateIndex stepImm;
+ ir::Type pointerTy = getType(pointerFamily);
+ if (ctx.getPointerSize() == ir::POINTER_32_BITS)
+ stepImm = ctx.newImmediate(uint32_t(step));
+ else
+ stepImm = ctx.newImmediate(uint64_t(step));
ir::Register stepReg = ctx.reg(ctx.getPointerFamily());
- ctx.LOADI(ir::TYPE_U32, stepReg, stepImm);
- ctx.ADD(ir::TYPE_U32, stack, stack, stepReg);
+ ctx.LOADI(pointerTy, stepReg, stepImm);
+ ctx.ADD(pointerTy, stack, stack, stepReg);
ctx.getFunction().pushStackSize(step);
}
}
// Set the destination register properly
- ctx.MOV(imm.getType(), dst, stack);
+ if (legacyMode)
+ ctx.MOV(imm.getType(), dst, stack);
+ else
+ ctx.ADD(imm.getType(), dst, stack, ir::ocl::stackbuffer);
ctx.LOADI(imm.getType(), reg, immIndex);
ctx.ADD(imm.getType(), stack, stack, reg);
@@ -4518,7 +4529,7 @@ namespace gbe
// but later ArgumentLower pass need to match exact load/addImm pattern
// so, I avoid subtracting zero base to satisfy ArgumentLower pass.
if (!zeroBase)
- ctx.SUB(ir::TYPE_U32, mPtr, pointer, baseReg);
+ ctx.SUB(getType(ctx, llvmPtr->getType()), mPtr, pointer, baseReg);
else
mPtr = pointer;
} else {
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..7aa7b4e 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -350,17 +350,19 @@ error:
{
Value* op0 = NULL;
Value* val = NULL;
+ const DataLayout &DL = module->getDataLayout();
+ Type *ptrIntTy = IntegerType::get(module->getContext(), DL.getPointerSizeInBits());
/////////////////////////////////////////////////////
/* calculate index address.
index_addr = (index_offset + wg_offset )* sizeof(int) * 2 + index_buf_ptr
index_offset = global_size2 * global_size1 * global_size0 * printf_num */
- Value* index_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, printf_num));
+ Value* index_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(ptrIntTy, printf_num));
// index_offset + offset
op0 = builder->CreateAdd(index_offset, wg_offset);
// (index_offset + offset)* sizeof(int) * 2
- op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(int)*2));
+ op0 = builder->CreateMul(op0, ConstantInt::get(ptrIntTy, sizeof(int)*2));
// Final index address = index_buf_ptr + (index_offset + offset)* sizeof(int)
op0 = builder->CreateAdd(index_buf_ptr, op0);
Value* index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1));
@@ -369,10 +371,13 @@ error:
val = builder->CreateAdd(loop_num, ConstantInt::get(intTy, 1));
builder->CreateStore(val, index_addr);// The loop number.
- op0 = builder->CreateAdd(op0, ConstantInt::get(intTy, sizeof(int)));
+ op0 = builder->CreateAdd(op0, ConstantInt::get(ptrIntTy, sizeof(int)));
index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1));
builder->CreateStore(ConstantInt::get(intTy, printf_num), index_addr);// The printf number.
+ if(DL.getPointerSizeInBits() == 64)
+ loop_num = builder->CreateZExt(loop_num, ptrIntTy);
+
int i = 1;
Value* data_addr = NULL;
for (auto &s : (*pInfo.printf_fmt).first) {
@@ -406,14 +411,14 @@ error:
data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset
//global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset */
- op0 = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, out_buf_sizeof_offset));
+ op0 = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(ptrIntTy, out_buf_sizeof_offset));
//offset * sizeof(specify)
- val = builder->CreateMul(wg_offset, ConstantInt::get(intTy, sizeof_size));
+ val = builder->CreateMul(wg_offset, ConstantInt::get(ptrIntTy, sizeof_size));
//data_offset + pbuf_ptr
op0 = builder->CreateAdd(pbuf_ptr, op0);
op0 = builder->CreateAdd(op0, val);
//totalSizeofSize * global_size2 * global_size1 * global_size0
- val = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, totalSizeofSize));
+ val = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(ptrIntTy, totalSizeofSize));
//totalSizeofSize * global_size2 * global_size1 * global_size0 * loop_num
val = builder->CreateMul(val, loop_num);
//final
@@ -543,6 +548,8 @@ error:
totalSizeofSize = 0;
module = F.getParent();
intTy = IntegerType::get(module->getContext(), 32);
+ const DataLayout &DL = module->getDataLayout();
+ Type *ptrIntTy = IntegerType::get(module->getContext(), DL.getPointerSizeInBits());
// As we inline all function calls, so skip non-kernel functions
bool bKernel = isKernelFunction(F);
@@ -608,7 +615,7 @@ error:
nullptr,
GlobalVariable::NotThreadLocal,
1);
- pbuf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
+ pbuf_ptr = builder->CreatePtrToInt(pBuf, ptrIntTy);
}
if (!index_buf_ptr) {
Type *ptrTy = Type::getInt32PtrTy(module->getContext(), 1);
@@ -619,7 +626,7 @@ error:
nullptr,
GlobalVariable::NotThreadLocal,
1);
- index_buf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
+ index_buf_ptr = builder->CreatePtrToInt(pBuf, ptrIntTy);
}
if (!wg_offset || !g1Xg2Xg3) {
@@ -683,6 +690,10 @@ error:
op0 = builder->CreateMul(global_size2, global_size1);
// global_size2 * global_size1 * global_size0
g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
+ if(DL.getPointerSizeInBits() == 64) {
+ wg_offset = builder->CreateZExt(wg_offset, ptrIntTy);
+ g1Xg2Xg3 = builder->CreateZExt(g1Xg2Xg3, ptrIntTy);
+ }
}
--
1.9.1
More information about the Beignet
mailing list