[Beignet] [Patch V2 2/3] OCL20/GBE: Change the pointer relative op's type.

Yang Rong rong.r.yang at intel.com
Fri Dec 4 00:30:31 PST 2015


Can't use 32bits ops on pointer relative instructions.
Prepare to enable SPIR64.

V2: Set the pointers' family to QWORD and remove useless code.

Signed-off-by: Ruiling Song <ruiling.song at intel.com>
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 backend/src/backend/gen_context.cpp     | 17 +++++++++++++++
 backend/src/ir/lowering.cpp             | 10 +++++++--
 backend/src/ir/profile.cpp              |  6 +++---
 backend/src/llvm/llvm_gen_backend.cpp   | 37 +++++++++++++++++++++------------
 backend/src/llvm/llvm_printf_parser.cpp | 27 +++++++++++++++++-------
 5 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 02d0bfd..cef4e4c 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -242,6 +242,23 @@ namespace gbe
         p->MUL(tmpReg, tmpReg, GenRegister::immuw(perThreadSize));
       p->curr.execWidth = this->simdWidth;
       p->ADD(stackptr, stackptr, tmpReg);
+      if (fn.getPointerFamily() == ir::FAMILY_QWORD) {
+        const GenRegister selStatckPtr2 = this->simdWidth == 8 ?
+          GenRegister::ul8grf(ir::ocl::stackptr) :
+          GenRegister::ul16grf(ir::ocl::stackptr);
+        const GenRegister stackptr2 = ra->genReg(selStatckPtr2);
+        int simdWidth = p->curr.execWidth;
+        if (simdWidth == 16) {
+          // we need do second quarter first, because the dst type is QW,
+          // while the src is DW. If we do first quater first, the 1st
+          // quarter's dst would contain the 2nd quarter's src.
+          p->curr.execWidth = 8;
+          p->curr.quarterControl = GEN_COMPRESSION_Q2;
+          p->MOV(GenRegister::Qn(stackptr2, 1), GenRegister::Qn(stackptr,1));
+        }
+        p->curr.quarterControl = GEN_COMPRESSION_Q1;
+        p->MOV(stackptr2, stackptr);
+      }
     p->pop();
   }
 
diff --git a/backend/src/ir/lowering.cpp b/backend/src/ir/lowering.cpp
index 66ced8c..9ae90ef 100644
--- a/backend/src/ir/lowering.cpp
+++ b/backend/src/ir/lowering.cpp
@@ -367,8 +367,14 @@ namespace ir {
         const uint32_t offset = valueID * size;
 
         const Register reg = load->getValue(valueID);
-
-        Instruction mov = ir::INDIRECT_MOV(type, reg, arg, load->getAddressRegister(), offset);
+        Register addressReg = load->getAddressRegister();
+        if (fn->getPointerFamily() == FAMILY_QWORD) {
+          Register tmp = fn->newRegister(FAMILY_DWORD);
+          Instruction cvt = ir::CVT(ir::TYPE_U32, ir::TYPE_U64, tmp, load->getAddressRegister());
+          cvt.insert(ins_after, &ins_after);
+          addressReg = tmp;
+        }
+        Instruction mov = ir::INDIRECT_MOV(type, reg, arg, addressReg, offset);
         mov.insert(ins_after, &ins_after);
         replaced = true;
       }
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 4486863..3ead8a7 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -74,7 +74,7 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, goffset0, 1, GBE_CURBE_GLOBAL_OFFSET_X);
       DECL_NEW_REG(FAMILY_DWORD, goffset1, 1, GBE_CURBE_GLOBAL_OFFSET_Y);
       DECL_NEW_REG(FAMILY_DWORD, goffset2, 1, GBE_CURBE_GLOBAL_OFFSET_Z);
-      DECL_NEW_REG(FAMILY_DWORD, stackptr, 0);
+      DECL_NEW_REG(FAMILY_QWORD, stackptr, 0);
       DECL_NEW_REG(FAMILY_QWORD, stackbuffer, 1, GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
       DECL_NEW_REG(FAMILY_WORD,  blockip, 0, GBE_CURBE_BLOCK_IP);
       DECL_NEW_REG(FAMILY_DWORD, barrierid, 1);
@@ -83,8 +83,8 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, zero, 1);
       DECL_NEW_REG(FAMILY_DWORD, one, 1);
       DECL_NEW_REG(FAMILY_WORD, retVal, 1);
-      DECL_NEW_REG(FAMILY_DWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
-      DECL_NEW_REG(FAMILY_DWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
+      DECL_NEW_REG(FAMILY_QWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
+      DECL_NEW_REG(FAMILY_QWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
       DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0, GBE_CURBE_DW_BLOCK_IP);
     }
 #undef DECL_NEW_REG
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 39665b8..2ea5400 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1218,12 +1218,12 @@ namespace gbe
           }
           Builder.SetInsertPoint(cast<Instruction>(theUser));
 
-          Type *int32Ty = Type::getInt32Ty(ptr->getContext());
-          Value *v1 = Builder.CreatePtrToInt(pointerOp, int32Ty);
+          Type *ptyTy = IntegerType::get(ptr->getContext(), ptr->getType()->getIntegerBitWidth());
+          Value *v1 = Builder.CreatePtrToInt(pointerOp, ptyTy);
 
-          Value *v2 = Builder.CreatePtrToInt(getSinglePointerOrigin(pointerOp), int32Ty);
-          Value *v3 = Builder.CreatePtrToInt(base, int32Ty);
-          Value *v4 = Builder.CreatePtrToInt(bti, int32Ty);
+          Value *v2 = Builder.CreatePtrToInt(getSinglePointerOrigin(pointerOp), ptyTy);
+          Value *v3 = Builder.CreatePtrToInt(base, ptyTy);
+          Value *v4 = Builder.CreatePtrToInt(bti, ptyTy);
           // newLocBase = (pointer - origin) + base_start
           Value *diff = Builder.CreateSub(v1, v2);
           Value *newLocBase = Builder.CreateAdd(v3, diff);
@@ -1600,7 +1600,10 @@ namespace gbe
 
       // NULL pointers
       if(isa<ConstantPointerNull>(CPV)) {
-        return ctx.newImmediate(uint32_t(0));
+        if (ctx.getPointerFamily() == ir::FAMILY_QWORD)
+          return ctx.newImmediate(uint64_t(0));
+        else
+          return ctx.newImmediate(uint32_t(0));
       }
 
       const Type::TypeID typeID = CPV->getType()->getTypeID();
@@ -2553,13 +2556,13 @@ namespace gbe
 
         this->newRegister(const_cast<GlobalVariable*>(&v));
         ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
-        ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(oldSlm + padding/8, ir::TYPE_S32));
+        ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(oldSlm + padding/8, getType(ctx, v.getType())));
       } else if(addrSpace == ir::MEM_CONSTANT || v.isConstant()) {
         GBE_ASSERT(v.hasInitializer());
         this->newRegister(const_cast<GlobalVariable*>(&v));
         ir::Register reg = regTranslator.getScalar(const_cast<GlobalVariable*>(&v), 0);
         ir::Constant &con = unit.getConstantSet().getConstant(v.getName());
-        ctx.LOADI(ir::TYPE_S32, reg, ctx.newIntegerImmediate(con.getOffset(), ir::TYPE_S32));
+        ctx.LOADI(getType(ctx, v.getType()), reg, ctx.newIntegerImmediate(con.getOffset(), getType(ctx, v.getType())));
       } else {
         if(v.getName().equals(StringRef("__gen_ocl_printf_buf"))) {
           ctx.getFunction().getPrintfSet()->setBufBTI(BtiMap.find(const_cast<GlobalVariable*>(&v))->second);
@@ -4342,15 +4345,23 @@ namespace gbe
       uint32_t prevStackPtr = ctx.getFunction().getStackSize();
       uint32_t step = ((prevStackPtr + (align - 1)) & ~(align - 1)) - prevStackPtr;
       if (step != 0) {
-        ir::ImmediateIndex stepImm = ctx.newIntegerImmediate(step, ir::TYPE_U32);
+        ir::ImmediateIndex stepImm;
+        ir::Type pointerTy = getType(pointerFamily);
+        if (ctx.getPointerSize() == ir::POINTER_32_BITS)
+          stepImm = ctx.newImmediate(uint32_t(step));
+        else
+          stepImm = ctx.newImmediate(uint64_t(step));
         ir::Register stepReg = ctx.reg(ctx.getPointerFamily());
-        ctx.LOADI(ir::TYPE_U32, stepReg, stepImm);
-        ctx.ADD(ir::TYPE_U32, stack, stack, stepReg);
+        ctx.LOADI(pointerTy, stepReg, stepImm);
+        ctx.ADD(pointerTy, stack, stack, stepReg);
         ctx.getFunction().pushStackSize(step);
       }
     }
     // Set the destination register properly
-    ctx.MOV(imm.getType(), dst, stack);
+    if (legacyMode)
+      ctx.MOV(imm.getType(), dst, stack);
+    else
+      ctx.ADD(imm.getType(), dst, stack, ir::ocl::stackbuffer);
 
     ctx.LOADI(imm.getType(), reg, immIndex);
     ctx.ADD(imm.getType(), stack, stack, reg);
@@ -4518,7 +4529,7 @@ namespace gbe
       // but later ArgumentLower pass need to match exact load/addImm pattern
       // so, I avoid subtracting zero base to satisfy ArgumentLower pass.
       if (!zeroBase)
-        ctx.SUB(ir::TYPE_U32, mPtr, pointer, baseReg);
+        ctx.SUB(getType(ctx, llvmPtr->getType()), mPtr, pointer, baseReg);
       else
         mPtr = pointer;
     } else {
diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
index bdaed8a..7aa7b4e 100644
--- a/backend/src/llvm/llvm_printf_parser.cpp
+++ b/backend/src/llvm/llvm_printf_parser.cpp
@@ -350,17 +350,19 @@ error:
   {
     Value* op0 = NULL;
     Value* val = NULL;
+    const DataLayout &DL = module->getDataLayout();
+    Type *ptrIntTy = IntegerType::get(module->getContext(), DL.getPointerSizeInBits());
 
     /////////////////////////////////////////////////////
     /* calculate index address.
        index_addr = (index_offset + wg_offset )* sizeof(int) * 2 + index_buf_ptr
        index_offset = global_size2 * global_size1 * global_size0 * printf_num */
 
-    Value* index_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, printf_num));
+    Value* index_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(ptrIntTy, printf_num));
     // index_offset + offset
     op0 = builder->CreateAdd(index_offset, wg_offset);
     // (index_offset + offset)* sizeof(int) * 2
-    op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(int)*2));
+    op0 = builder->CreateMul(op0, ConstantInt::get(ptrIntTy, sizeof(int)*2));
     // Final index address = index_buf_ptr + (index_offset + offset)* sizeof(int)
     op0 = builder->CreateAdd(index_buf_ptr, op0);
     Value* index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1));
@@ -369,10 +371,13 @@ error:
     val = builder->CreateAdd(loop_num, ConstantInt::get(intTy, 1));
     builder->CreateStore(val, index_addr);// The loop number.
 
-    op0 = builder->CreateAdd(op0, ConstantInt::get(intTy, sizeof(int)));
+    op0 = builder->CreateAdd(op0, ConstantInt::get(ptrIntTy, sizeof(int)));
     index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1));
     builder->CreateStore(ConstantInt::get(intTy, printf_num), index_addr);// The printf number.
 
+    if(DL.getPointerSizeInBits() == 64)
+      loop_num = builder->CreateZExt(loop_num, ptrIntTy);
+
     int i = 1;
     Value* data_addr = NULL;
     for (auto &s : (*pInfo.printf_fmt).first) {
@@ -406,14 +411,14 @@ error:
       data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset
 
       //global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset */
-      op0 = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, out_buf_sizeof_offset));
+      op0 = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(ptrIntTy, out_buf_sizeof_offset));
       //offset * sizeof(specify)
-      val = builder->CreateMul(wg_offset, ConstantInt::get(intTy, sizeof_size));
+      val = builder->CreateMul(wg_offset, ConstantInt::get(ptrIntTy, sizeof_size));
       //data_offset + pbuf_ptr
       op0 = builder->CreateAdd(pbuf_ptr, op0);
       op0 = builder->CreateAdd(op0, val);
       //totalSizeofSize * global_size2 * global_size1 * global_size0
-      val = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, totalSizeofSize));
+      val = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(ptrIntTy, totalSizeofSize));
       //totalSizeofSize * global_size2 * global_size1 * global_size0 * loop_num
       val = builder->CreateMul(val, loop_num);
       //final
@@ -543,6 +548,8 @@ error:
     totalSizeofSize = 0;
     module = F.getParent();
     intTy = IntegerType::get(module->getContext(), 32);
+    const DataLayout &DL = module->getDataLayout();
+    Type *ptrIntTy = IntegerType::get(module->getContext(), DL.getPointerSizeInBits());
 
     // As we inline all function calls, so skip non-kernel functions
     bool bKernel = isKernelFunction(F);
@@ -608,7 +615,7 @@ error:
                                 nullptr,
                                 GlobalVariable::NotThreadLocal,
                                 1);
-      pbuf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
+      pbuf_ptr = builder->CreatePtrToInt(pBuf, ptrIntTy);
     }
     if (!index_buf_ptr) {
       Type *ptrTy = Type::getInt32PtrTy(module->getContext(), 1);
@@ -619,7 +626,7 @@ error:
                                 nullptr,
                                 GlobalVariable::NotThreadLocal,
                                 1);
-      index_buf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
+      index_buf_ptr = builder->CreatePtrToInt(pBuf, ptrIntTy);
     }
 
     if (!wg_offset || !g1Xg2Xg3) {
@@ -683,6 +690,10 @@ error:
       op0 = builder->CreateMul(global_size2, global_size1);
       // global_size2 * global_size1 * global_size0
       g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
+      if(DL.getPointerSizeInBits() == 64) {
+        wg_offset = builder->CreateZExt(wg_offset, ptrIntTy);
+        g1Xg2Xg3 = builder->CreateZExt(g1Xg2Xg3, ptrIntTy);
+      }
     }
 
 
-- 
1.9.1



More information about the Beignet mailing list