[Beignet] [PATCH V2] Fit the printf bug in loop
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Oct 14 17:04:05 PDT 2014
LGTM, pushed, thanks.
On Tue, Oct 14, 2014 at 03:52:04PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> The static analyse for printf can not totally work
> when the printf inst is within the loop and the loop
> can not be unrolled. This causes the printf just to
> print one info for a loop and to lose all the others.
> We now increment the exec number every time the printf
> inst is triggered. The number is stored for output all
> the message later.
> The problem is that we can not caculate the exact loops
> number for each printf inst. The wrong loop number will
> cause the data overwritten. We now assume all the printf
> inst are in loop and store the data like this:
> | PRINTF1_DATA PRINTF2_DATA ... | PRINTF1_DATA PRINTF2_DATA ... | ...
> | DATA_LOOP_ONE | DATA_LOOP_TWO | ...
> Although this may cause some space wasted.
>
> Another problem is that we need to decide the size of printf buffer
> because the loop upbound can not be caculated. We just set
> it yo 1M for small info slot request and 4M for big one.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/ir/printf.cpp | 13 +-
> backend/src/llvm/llvm_printf_parser.cpp | 376 +++++++++++++++++++++----------
> kernels/test_printf.cl | 27 ++-
> src/cl_command_queue_gen7.c | 8 +
> 4 files changed, 283 insertions(+), 141 deletions(-)
>
> diff --git a/backend/src/ir/printf.cpp b/backend/src/ir/printf.cpp
> index e99aad5..093bfc6 100644
> --- a/backend/src/ir/printf.cpp
> +++ b/backend/src/ir/printf.cpp
> @@ -106,8 +106,9 @@ namespace gbe
> if (!vec_i) \
> pf_str = pf_str + std::string(#conv); \
> printf(pf_str.c_str(), \
> - ((target_ty *)((char *)buf_addr + slot.state->out_buf_sizeof_offset * \
> - global_wk_sz0 * global_wk_sz1 * global_wk_sz2)) \
> + ((target_ty *)((char *)buf_addr + sizeOfSize * global_wk_sz0 * global_wk_sz1 * global_wk_sz2 * n \
> + + slot.state->out_buf_sizeof_offset * \
> + global_wk_sz0 * global_wk_sz1 * global_wk_sz2)) \
> [(k*global_wk_sz0*global_wk_sz1 + j*global_wk_sz0 + i) * vec_num + vec_i]);\
> } while (0)
>
> @@ -124,10 +125,9 @@ namespace gbe
> for (i = 0; i < global_wk_sz0; i++) {
> for (j = 0; j < global_wk_sz1; j++) {
> for (k = 0; k < global_wk_sz2; k++) {
> -
> - int flag = ((int *)index_addr)[stmt*global_wk_sz0*global_wk_sz1*global_wk_sz2
> - + k*global_wk_sz0*global_wk_sz1 + j*global_wk_sz0 + i];
> - if (flag) {
> + int loop_num = ((int *)index_addr)[stmt*global_wk_sz0*global_wk_sz1*global_wk_sz2
> + + k*global_wk_sz0*global_wk_sz1 + j*global_wk_sz0 + i];
> + for (int n = 0; n < loop_num; n++) {
> for (auto &slot : pf) {
> pf_str = "";
> int vec_num;
> @@ -225,6 +225,7 @@ namespace gbe
>
> pf_str = "";
> }
> +
> }
> }
> }
> diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
> index 29684ba..bb6c26d 100644
> --- a/backend/src/llvm/llvm_printf_parser.cpp
> +++ b/backend/src/llvm/llvm_printf_parser.cpp
> @@ -330,9 +330,17 @@ error:
> Type* intTy;
> Value* pbuf_ptr;
> Value* index_buf_ptr;
> + Value* g1Xg2Xg3;
> + Value* wg_offset;
> int out_buf_sizeof_offset;
> static map<CallInst*, PrintfSet::PrintfFmt*> printfs;
> int printf_num;
> + int totalSizeofSize;
> +
> + struct PrintfParserInfo {
> + llvm::CallInst* call;
> + PrintfSet::PrintfFmt* printf_fmt;
> + };
>
> PrintfParser(void) : FunctionPass(ID)
> {
> @@ -343,7 +351,10 @@ error:
> printfs.clear();
> pbuf_ptr = NULL;
> index_buf_ptr = NULL;
> + g1Xg2Xg3 = NULL;
> + wg_offset = NULL;
> printf_num = 0;
> + totalSizeofSize = 0;
> }
>
> ~PrintfParser(void)
> @@ -355,9 +366,9 @@ error:
> printfs.clear();
> }
>
> -
> - bool parseOnePrintfInstruction(CallInst *& call);
> + bool parseOnePrintfInstruction(CallInst * call, PrintfParserInfo& info, int& sizeof_size);
> bool generateOneParameterInst(PrintfSlot& slot, Value*& arg, Type*& dst_type, int& sizeof_size);
> + bool generateOnePrintfInstruction(PrintfParserInfo& pInfo);
>
> virtual const char *getPassName() const
> {
> @@ -367,119 +378,38 @@ error:
> virtual bool runOnFunction(llvm::Function &F);
> };
>
> - bool PrintfParser::parseOnePrintfInstruction(CallInst *& call)
> + bool PrintfParser::generateOnePrintfInstruction(PrintfParserInfo& pInfo)
> {
> - CallSite CS(call);
> - CallSite::arg_iterator CI_FMT = CS.arg_begin();
> - int param_num = 0;
> -
> - llvm::Constant* arg0 = dyn_cast<llvm::ConstantExpr>(*CI_FMT);
> - llvm::Constant* arg0_ptr = dyn_cast<llvm::Constant>(arg0->getOperand(0));
> - if (!arg0_ptr) {
> - return false;
> - }
> -
> - ConstantDataSequential* fmt_arg = dyn_cast<ConstantDataSequential>(arg0_ptr->getOperand(0));
> - if (!fmt_arg || !fmt_arg->isCString()) {
> - return false;
> - }
> -
> - std::string fmt = fmt_arg->getAsCString();
> -
> - PrintfSet::PrintfFmt* printf_fmt = NULL;
> -
> - if (!(printf_fmt = parser_printf_fmt((char *)fmt.c_str(), param_num))) {//at lease print something
> - return false;
> - }
> -
> - /* iff parameter more than %, error. */
> - /* str_fmt arg0 arg1 ... NULL */
> - if (param_num + 2 < static_cast<int>(call->getNumOperands())) {
> - delete printf_fmt;
> - return false;
> - }
> -
> - /* FIXME: Because the OpenCL language do not support va macro, and we do not want
> - to introduce the va_list, va_start and va_end into our code, we just simulate
> - the function calls to caculate the offset caculation here. */
> -#define BUILD_CALL_INST(name) \
> - CallInst* name = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \
> - "__gen_ocl_get_"#name, \
> - IntegerType::getInt32Ty(module->getContext()), \
> - NULL)))
> -
> - BUILD_CALL_INST(group_id2);
> - BUILD_CALL_INST(group_id1);
> - BUILD_CALL_INST(group_id0);
> - BUILD_CALL_INST(global_size2);
> - BUILD_CALL_INST(global_size1);
> - BUILD_CALL_INST(global_size0);
> - BUILD_CALL_INST(local_id2);
> - BUILD_CALL_INST(local_id1);
> - BUILD_CALL_INST(local_id0);
> - BUILD_CALL_INST(local_size2);
> - BUILD_CALL_INST(local_size1);
> - BUILD_CALL_INST(local_size0);
> -
> -#undef BUILD_CALL_INST
> -
> Value* op0 = NULL;
> Value* val = NULL;
> - /* calculate offset for later usage.
> - offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
> - + (local_id1 + local_size1 * group_id1) * global_size0
> - + (local_id0 + local_size0 * group_id0)) * sizeof(type) */
> -
> - // local_size2 * group_id2
> - val = builder->CreateMul(local_size2, group_id2);
> - // local_id2 + local_size2 * group_id2
> - val = builder->CreateAdd(local_id2, val);
> - // global_size1 * global_size0
> - op0 = builder->CreateMul(global_size1, global_size0);
> - // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
> - Value* offset1 = builder->CreateMul(val, op0);
> - // local_size1 * group_id1
> - val = builder->CreateMul(local_size1, group_id1);
> - // local_id1 + local_size1 * group_id1
> - val = builder->CreateAdd(local_id1, val);
> - // (local_id1 + local_size1 * group_id1) * global_size_0
> - Value* offset2 = builder->CreateMul(val, global_size0);
> - // local_size0 * group_id0
> - val = builder->CreateMul(local_size0, group_id0);
> - // local_id0 + local_size0 * group_id0
> - val = builder->CreateAdd(local_id0, val);
> - // The total sum
> - val = builder->CreateAdd(val, offset1);
> - Value* offset = builder->CreateAdd(val, offset2);
>
> /////////////////////////////////////////////////////
> /* calculate index address.
> - index_addr = (index_offset + offset )* sizeof(int) + index_buf_ptr
> + index_addr = (index_offset + wg_offset )* sizeof(int) + index_buf_ptr
> index_offset = global_size2 * global_size1 * global_size0 * printf_num */
>
> - // global_size2 * global_size1
> - op0 = builder->CreateMul(global_size2, global_size1);
> - // global_size2 * global_size1 * global_size0
> - Value* glXg2Xg3 = builder->CreateMul(op0, global_size0);
> - Value* index_offset = builder->CreateMul(glXg2Xg3, ConstantInt::get(intTy, printf_num));
> + Value* index_offset = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, printf_num));
> // index_offset + offset
> - op0 = builder->CreateAdd(index_offset, offset);
> + op0 = builder->CreateAdd(index_offset, wg_offset);
> // (index_offset + offset)* sizeof(int)
> op0 = builder->CreateMul(op0, ConstantInt::get(intTy, sizeof(int)));
> // Final index address = index_buf_ptr + (index_offset + offset)* sizeof(int)
> op0 = builder->CreateAdd(index_buf_ptr, op0);
> Value* index_addr = builder->CreateIntToPtr(op0, Type::getInt32PtrTy(module->getContext(), 1));
> - builder->CreateStore(ConstantInt::get(intTy, 1), index_addr);// The flag
> + // Load the printf num first, printf may be in loop.
> + Value* loop_num = builder->CreateLoad(index_addr);
> + val = builder->CreateAdd(loop_num, ConstantInt::get(intTy, 1));
> + builder->CreateStore(val, index_addr);// The loop number.
>
> int i = 1;
> Value* data_addr = NULL;
> - for (auto &s : *printf_fmt) {
> + for (auto &s : *pInfo.printf_fmt) {
> if (s.type == PRINTF_SLOT_TYPE_STRING)
> continue;
>
> - assert(i < static_cast<int>(call->getNumOperands()) - 1);
> + assert(i < static_cast<int>(pInfo.call->getNumOperands()) - 1);
>
> - Value *out_arg = call->getOperand(i);
> + Value *out_arg = pInfo.call->getOperand(i);
> Type *dst_type = NULL;
> int sizeof_size = 0;
> if (!generateOneParameterInst(s, out_arg, dst_type, sizeof_size)) {
> @@ -499,16 +429,23 @@ error:
>
> /////////////////////////////////////////////////////
> /* Calculate the data address.
> - data_addr = data_offset + pbuf_ptr + offset * sizeof(specify)
> + data_addr = (data_offset + pbuf_ptr + offset * sizeof(specify)) +
> + totalSizeofSize * global_size2 * global_size1 * global_size0 * loop_num
> data_offset = global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset
>
> //global_size2 * global_size1 * global_size0 * out_buf_sizeof_offset */
> - op0 = builder->CreateMul(glXg2Xg3, ConstantInt::get(intTy, out_buf_sizeof_offset));
> + op0 = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, out_buf_sizeof_offset));
> //offset * sizeof(specify)
> - val = builder->CreateMul(offset, ConstantInt::get(intTy, sizeof_size));
> + val = builder->CreateMul(wg_offset, ConstantInt::get(intTy, sizeof_size));
> //data_offset + pbuf_ptr
> op0 = builder->CreateAdd(pbuf_ptr, op0);
> op0 = builder->CreateAdd(op0, val);
> + //totalSizeofSize * global_size2 * global_size1 * global_size0
> + val = builder->CreateMul(g1Xg2Xg3, ConstantInt::get(intTy, totalSizeofSize));
> + //totalSizeofSize * global_size2 * global_size1 * global_size0 * loop_num
> + val = builder->CreateMul(val, loop_num);
> + //final
> + op0 = builder->CreateAdd(op0, val);
> data_addr = builder->CreateIntToPtr(op0, dst_type);
> builder->CreateStore(out_arg, data_addr);
>
> @@ -520,14 +457,101 @@ error:
> "__gen_ocl_printf", Type::getVoidTy(module->getContext()),
> NULL)));
> assert(printfs[printf_inst] == NULL);
> - printfs[printf_inst] = printf_fmt;
> + printfs[printf_inst] = pInfo.printf_fmt;
> printf_num++;
> return true;
> }
>
> + bool PrintfParser::parseOnePrintfInstruction(CallInst * call, PrintfParserInfo& info, int& sizeof_size)
> + {
> + CallSite CS(call);
> + CallSite::arg_iterator CI_FMT = CS.arg_begin();
> + int param_num = 0;
> +
> + llvm::Constant* arg0 = dyn_cast<llvm::ConstantExpr>(*CI_FMT);
> + llvm::Constant* arg0_ptr = dyn_cast<llvm::Constant>(arg0->getOperand(0));
> + if (!arg0_ptr) {
> + return false;
> + }
> +
> + ConstantDataSequential* fmt_arg = dyn_cast<ConstantDataSequential>(arg0_ptr->getOperand(0));
> + if (!fmt_arg || !fmt_arg->isCString()) {
> + return false;
> + }
> +
> + std::string fmt = fmt_arg->getAsCString();
> +
> + PrintfSet::PrintfFmt* printf_fmt = NULL;
> +
> + if (!(printf_fmt = parser_printf_fmt((char *)fmt.c_str(), param_num))) {//at lease print something
> + return false;
> + }
> +
> + /* iff parameter more than %, error. */
> + /* str_fmt arg0 arg1 ... NULL */
> + if (param_num + 2 < static_cast<int>(call->getNumOperands())) {
> + delete printf_fmt;
> + return false;
> + }
> +
> + info.call = call;
> + info.printf_fmt = printf_fmt;
> +
> + sizeof_size = 0;
> + int i = 1;
> + for (auto &s : *printf_fmt) {
> + int sz = 0;
> + if (s.type == PRINTF_SLOT_TYPE_STRING)
> + continue;
> +
> + assert(i < static_cast<int>(call->getNumOperands()) - 1);
> +
> + switch (s.state->conversion_specifier) {
> + case PRINTF_CONVERSION_I:
> + case PRINTF_CONVERSION_D:
> + case PRINTF_CONVERSION_O:
> + case PRINTF_CONVERSION_U:
> + case PRINTF_CONVERSION_x:
> + case PRINTF_CONVERSION_X:
> + case PRINTF_CONVERSION_P:
> + if (s.state->length_modifier == PRINTF_LM_L)
> + sz = sizeof(int64_t);
> + else
> + sz = sizeof(int);
> + break;
> + case PRINTF_CONVERSION_C:
> + sz = sizeof(char);
> + break;
> + case PRINTF_CONVERSION_F:
> + case PRINTF_CONVERSION_f:
> + case PRINTF_CONVERSION_E:
> + case PRINTF_CONVERSION_e:
> + case PRINTF_CONVERSION_G:
> + case PRINTF_CONVERSION_g:
> + case PRINTF_CONVERSION_A:
> + case PRINTF_CONVERSION_a:
> + sz = sizeof(float);
> + break;
> + default:
> + sz = 0;
> + break;
> + }
> +
> + if (s.state->vector_n) {
> + sz = sz * s.state->vector_n;
> + }
> +
> + sizeof_size += ((sz + 3) / 4) * 4;
> + }
> +
> + return true;
> + }
> +
> +
> bool PrintfParser::runOnFunction(llvm::Function &F)
> {
> bool changed = false;
> + bool hasPrintf = false;
> switch (F.getCallingConv()) {
> #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR <= 2
> case CallingConv::PTX_Device:
> @@ -541,6 +565,8 @@ error:
> GBE_ASSERTM(false, "Unsupported calling convention");
> }
>
> + std::vector<PrintfParserInfo> infoVect;
> + totalSizeofSize = 0;
> module = F.getParent();
> intTy = IntegerType::get(module->getContext(), 32);
>
> @@ -550,11 +576,14 @@ error:
>
> builder = new IRBuilder<>(module->getContext());
>
> - /* Iter the function and find printf. */
> + /* First find printfs and caculate all slots size of one loop. */
> for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) {
> for (BasicBlock::iterator instI = B->begin(),
> instE = B->end(); instI != instE; ++instI) {
>
> + PrintfParserInfo pInfo;
> + int sizeof_size = 0;
> +
> llvm::CallInst* call = dyn_cast<llvm::CallInst>(instI);
> if (!call) {
> continue;
> @@ -569,26 +598,107 @@ error:
> if (fnName != "__gen_ocl_printf_stub")
> continue;
>
> - changed = true;
> -
> - builder->SetInsertPoint(call);
> -
> - if (!pbuf_ptr) {
> - /* alloc a new buffer ptr to collect the print output. */
> - Type *ptrTy = Type::getInt32PtrTy(module->getContext());
> - llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_buf"), ptrTy);
> - pbuf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
> - }
> - if (!index_buf_ptr) {
> - Type *ptrTy = Type::getInt32PtrTy(module->getContext());
> - llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_index_buf"), ptrTy);
> - index_buf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
> + if (!parseOnePrintfInstruction(call, pInfo, sizeof_size)) {
> + printf("Parse One printf inst failed, may have some error\n");
> + // Just kill this printf instruction.
> + deadprintfs.push_back(PrintfInst(cast<Instruction>(call),0));
> + continue;
> }
>
> - deadprintfs.push_back(PrintfInst(cast<Instruction>(call),parseOnePrintfInstruction(call)));
> + hasPrintf = true;
> +
> + infoVect.push_back(pInfo);
> + totalSizeofSize += sizeof_size;
> }
> }
>
> + if (!hasPrintf)
> + return changed;
> +
> + if (!pbuf_ptr) {
> + /* alloc a new buffer ptr to collect the print output. */
> + Type *ptrTy = Type::getInt32PtrTy(module->getContext());
> + llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_buf"), ptrTy);
> + pbuf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
> + }
> + if (!index_buf_ptr) {
> + Type *ptrTy = Type::getInt32PtrTy(module->getContext());
> + llvm::Constant * pBuf = module->getOrInsertGlobal(StringRef("__gen_ocl_printf_index_buf"), ptrTy);
> + index_buf_ptr = builder->CreatePtrToInt(pBuf, Type::getInt32Ty(module->getContext()));
> + }
> +
> + if (!wg_offset || !g1Xg2Xg3) {
> + Value* op0 = NULL;
> + Value* val = NULL;
> +
> + builder->SetInsertPoint(F.begin()->begin());// Insert the common var in the begin.
> +
> + /* FIXME: Because the OpenCL language do not support va macro, and we do not want
> + to introduce the va_list, va_start and va_end into our code, we just simulate
> + the function calls to caculate the offset caculation here. */
> +#define BUILD_CALL_INST(name) \
> + CallInst* name = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction( \
> + "__gen_ocl_get_"#name, \
> + IntegerType::getInt32Ty(module->getContext()), \
> + NULL)))
> +
> + BUILD_CALL_INST(group_id2);
> + BUILD_CALL_INST(group_id1);
> + BUILD_CALL_INST(group_id0);
> + BUILD_CALL_INST(global_size2);
> + BUILD_CALL_INST(global_size1);
> + BUILD_CALL_INST(global_size0);
> + BUILD_CALL_INST(local_id2);
> + BUILD_CALL_INST(local_id1);
> + BUILD_CALL_INST(local_id0);
> + BUILD_CALL_INST(local_size2);
> + BUILD_CALL_INST(local_size1);
> + BUILD_CALL_INST(local_size0);
> +
> +#undef BUILD_CALL_INST
> +
> + /* calculate offset for later usage.
> + wg_offset = ((local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
> + + (local_id1 + local_size1 * group_id1) * global_size0
> + + (local_id0 + local_size0 * group_id0)) */
> +
> + // local_size2 * group_id2
> + val = builder->CreateMul(local_size2, group_id2);
> + // local_id2 + local_size2 * group_id2
> + val = builder->CreateAdd(local_id2, val);
> + // global_size1 * global_size0
> + op0 = builder->CreateMul(global_size1, global_size0);
> + // (local_id2 + local_size2 * group_id2) * (global_size1 * global_size0)
> + Value* offset1 = builder->CreateMul(val, op0);
> + // local_size1 * group_id1
> + val = builder->CreateMul(local_size1, group_id1);
> + // local_id1 + local_size1 * group_id1
> + val = builder->CreateAdd(local_id1, val);
> + // (local_id1 + local_size1 * group_id1) * global_size_0
> + Value* offset2 = builder->CreateMul(val, global_size0);
> + // local_size0 * group_id0
> + val = builder->CreateMul(local_size0, group_id0);
> + // local_id0 + local_size0 * group_id0
> + val = builder->CreateAdd(local_id0, val);
> + // The total sum
> + val = builder->CreateAdd(val, offset1);
> + wg_offset = builder->CreateAdd(val, offset2);
> +
> + // global_size2 * global_size1
> + op0 = builder->CreateMul(global_size2, global_size1);
> + // global_size2 * global_size1 * global_size0
> + g1Xg2Xg3 = builder->CreateMul(op0, global_size0);
> + }
> +
> +
> + /* Now generate the instructions. */
> + for (auto pInfo : infoVect) {
> + builder->SetInsertPoint(pInfo.call);
> + deadprintfs.push_back(PrintfInst(cast<Instruction>(pInfo.call), generateOnePrintfInstruction(pInfo)));
> + }
> +
> + assert(out_buf_sizeof_offset == totalSizeofSize);
> +
> /* Replace the instruction's operand if using printf's return value. */
> for (llvm::Function::iterator B = F.begin(), BE = F.end(); B != BE; B++) {
> for (BasicBlock::iterator instI = B->begin(),
> @@ -775,6 +885,7 @@ error:
> bool sign = false;
>
> if (vec_num != slot.state->vector_n) {
> + printf("Error The printf vector number is not match!\n");
> return false;
> }
>
> @@ -785,26 +896,37 @@ error:
> case PRINTF_CONVERSION_O:
> case PRINTF_CONVERSION_U:
> case PRINTF_CONVERSION_x:
> - case PRINTF_CONVERSION_X:
> - if (elt_type->getTypeID() != Type::IntegerTyID)
> + case PRINTF_CONVERSION_X: {
> + if (elt_type->getTypeID() != Type::IntegerTyID) {
> + printf("Do not support type conversion between float and int in vector printf!\n");
> return false;
> + }
> +
> + Type* elt_dst_type = NULL;
> + if (slot.state->length_modifier == PRINTF_LM_L) {
> + elt_dst_type = Type::getInt64Ty(elt_type->getContext());
> + } else {
> + elt_dst_type = Type::getInt32Ty(elt_type->getContext());
> + }
>
> /* If the bits change, we need to consider the signed. */
> - if (elt_type != Type::getInt32Ty(elt_type->getContext())) {
> + if (elt_type != elt_dst_type) {
> Value *II = NULL;
> for (int i = 0; i < vec_num; i++) {
> - Value *vec = II ? II : UndefValue::get(VectorType::get(Type::getInt32Ty(elt_type->getContext()), vec_num));
> + Value *vec = II ? II : UndefValue::get(VectorType::get(elt_dst_type, vec_num));
> Value *cv = ConstantInt::get(Type::getInt32Ty(elt_type->getContext()), i);
> Value *org = builder->CreateExtractElement(arg, cv);
> - Value *cvt = builder->CreateIntCast(org, Type::getInt32Ty(module->getContext()), sign);
> + Value *cvt = builder->CreateIntCast(org, elt_dst_type, sign);
> II = builder->CreateInsertElement(vec, cvt, cv);
> }
> arg = II;
> }
>
> dst_type = arg->getType()->getPointerTo(1);
> - sizeof_size = sizeof(int) * vec_num;
> + sizeof_size = (elt_dst_type == Type::getInt32Ty(elt_type->getContext()) ?
> + sizeof(int) * vec_num : sizeof(int64_t) * vec_num);
> return true;
> + }
>
> case PRINTF_CONVERSION_F:
> case PRINTF_CONVERSION_f:
> @@ -814,8 +936,10 @@ error:
> case PRINTF_CONVERSION_g:
> case PRINTF_CONVERSION_A:
> case PRINTF_CONVERSION_a:
> - if (elt_type->getTypeID() != Type::DoubleTyID && elt_type->getTypeID() != Type::FloatTyID)
> + if (elt_type->getTypeID() != Type::DoubleTyID && elt_type->getTypeID() != Type::FloatTyID) {
> + printf("Do not support type conversion between float and int in vector printf!\n");
> return false;
> + }
>
> if (elt_type->getTypeID() != Type::FloatTyID) {
> Value *II = NULL;
> @@ -828,10 +952,14 @@ error:
> }
> arg = II;
> }
> +
> + dst_type = arg->getType()->getPointerTo(1);
> + sizeof_size = sizeof(int) * vec_num;
> + return true;
> +
> + default:
> + return false;
> }
> - dst_type = arg->getType()->getPointerTo(1);
> - sizeof_size = sizeof(int) * vec_num;
> - return true;
> }
>
> default:
> diff --git a/kernels/test_printf.cl b/kernels/test_printf.cl
> index c2844f4..0a59e88 100644
> --- a/kernels/test_printf.cl
> +++ b/kernels/test_printf.cl
> @@ -4,6 +4,8 @@ test_printf(void)
> int x = (int)get_global_id(0);
> int y = (int)get_global_id(1);
> int z = (int)get_global_id(2);
> + int g0 = (int)get_global_size(0);
> + int g1 = (int)get_global_size(1);
> uint a = 'x';
> float f = 5.0f;
> int3 vec;
> @@ -14,28 +16,31 @@ test_printf(void)
>
> if (x == 0 && y == 0 && z == 0) {
> printf("--- Welcome to the printf test of %s ---\n", "Intel Beignet");
> -
> printf("### output a char is %c\n", a);
> -
> printf("@@@ A long value is %ld\n", cc);
> }
>
> - if (x % 15 == 0)
> - if (y % 3 == 0)
> - if (z % 7 == 0)
> - printf("######## global_id(x, y, z) = %v3d, global_size(d0, d1, d3) = (%d, %d, %d)\n",
> - vec, get_global_size(0), get_global_size(1), get_global_size(2));
> + for(int i = 0; i < g0/2; i++)
> + for(int j = 0; j < g1/2; j++)
> + if(x == 0 && y == 0 && z == 0)
> + printf("loops: i = %d, j = %d\n", i, j);
>
> - if (x == 1)
> + if (x == 0) {
> if (y == 0) {
> if (z % 2 == 0)
> - printf("#### output a float is %f\n", f);
> + printf("!!! output a float is %f\n", f);
> else
> - printf("#### output a float to int is %d\n", f);
> + printf("!!! output a float to int is %d\n", f);
> }
> + }
> +
> + if (x % 15 == 0)
> + if (y % 3 == 0)
> + if (z % 7 == 0)
> + printf("######## global_id(x, y, z) = %v3d, global_size(d0, d1, d3) = (%d, %d, %d)\n",
> + vec, get_global_size(0), get_global_size(1), get_global_size(2));
>
> if (x == 0 && y == 0 && z == 0) {
> printf("--- End to the printf test ---\n");
> }
> -
> }
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index 330f0f9..b020540 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -271,6 +271,14 @@ cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num,
> value = GBE_CURBE_PRINTF_BUF_POINTER;
> offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0);
> buf_size = interp_get_printf_sizeof_size(printf_info) * global_sz;
> + /* because of the printf may exist in a loop, which loop number can not be gotten by
> + static analysis. So we set the data buffer as big as we can. Out of bound printf
> + info will be discarded. */
> + if (buf_size < 1*1024)
> + buf_size = 1*1024*1024;
> + else
> + buf_size = 4*1024*1024; //at most.
> +
> if (offset > 0) {
> if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset, interp_get_printf_buf_bti(printf_info)) != 0)
> return -1;
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list