[Beignet] [PATCH 3/6] Add the PrintfParser llvm parser into the llvm backend.

Tue Jun 10 17:41:28 PDT 2014

Some comments as below:

On Tue, Jun 10, 2014 at 12:52:54PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
> 
> The PrintfParser will work before the llvm gen backend.
> It will filter out all the printf function call. When
> the printf call found, we will analyse the print format
> and % place holder here. Replace the print call with
> STORE or CONV+STORE instruction if needed.
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  backend/src/CMakeLists.txt              |   1 +
>  backend/src/ir/function.cpp             |   1 +
>  backend/src/ir/function.hpp             |   4 +
>  backend/src/llvm/llvm_gen_backend.cpp   |   5 +
>  backend/src/llvm/llvm_gen_backend.hpp   |   4 +
>  backend/src/llvm/llvm_printf_parser.cpp | 677 ++++++++++++++++++++++++++++++++
>  backend/src/llvm/llvm_to_gen.cpp        |   1 +
>  7 files changed, 693 insertions(+)
>  create mode 100644 backend/src/llvm/llvm_printf_parser.cpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 0716d35..6090174 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -147,6 +147,7 @@ else (GBE_USE_BLOB)
>      llvm/llvm_scalarize.cpp
>      llvm/llvm_intrinsic_lowering.cpp
>      llvm/llvm_barrier_nodup.cpp
> +    llvm/llvm_printf_parser.cpp
>      llvm/llvm_to_gen.cpp
>      llvm/llvm_loadstore_optimization.cpp
>      llvm/llvm_gen_backend.hpp
> diff --git a/backend/src/ir/function.cpp b/backend/src/ir/function.cpp
> index b0df412..a46108e 100644
> --- a/backend/src/ir/function.cpp
> +++ b/backend/src/ir/function.cpp
> @@ -48,6 +48,7 @@ namespace ir {
>      initProfile(*this);
>      samplerSet = GBE_NEW(SamplerSet);
>      imageSet = GBE_NEW(ImageSet);
> +    printfSet = GBE_NEW(PrintfSet);
>    }
>  
>    Function::~Function(void) {
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
> index 266e652..63bb6ea 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -29,6 +29,7 @@
>  #include "ir/instruction.hpp"
>  #include "ir/profile.hpp"
>  #include "ir/sampler.hpp"
> +#include "ir/printf.hpp"
>  #include "ir/image.hpp"
>  #include "sys/vector.hpp"
>  #include "sys/set.hpp"
> @@ -329,6 +330,8 @@ namespace ir {
>      SamplerSet* getSamplerSet(void) const {return samplerSet; }
>      /*! Get image set in this function */
>      ImageSet* getImageSet(void) const {return imageSet; }
> +    /*! Get printf set in this function */
> +    PrintfSet* getPrintfSet(void) const {return printfSet; }
>      /*! Set required work group size. */
>      void setCompileWorkGroupSize(size_t x, size_t y, size_t z) { compileWgSize[0] = x; compileWgSize[1] = y; compileWgSize[2] = z; }
>      /*! Get required work group size. */
> @@ -360,6 +363,7 @@ namespace ir {
>      uint32_t stackSize;             //!< stack size for private memory.
>      SamplerSet *samplerSet;         //!< samplers used in this function.
>      ImageSet* imageSet;             //!< Image set in this function's arguments..
> +    PrintfSet *printfSet;           //!< printfSet store the printf info.
>      size_t compileWgSize[3];        //!< required work group size specified by
>                                      //   __attribute__((reqd_work_group_size(X, Y, Z))).
>      GBE_CLASS(Function);            //!< Use custom allocator
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 7802818..4bb9039 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -2970,7 +2970,12 @@ namespace gbe
>  #undef DEF
>  
>            case GEN_OCL_PRINTF:
> +          {
> +            ir::PrintfSet::PrintfFmt* fmt = (ir::PrintfSet::PrintfFmt*)getPrintfInfo(&I);
> +            ctx.getFunction().getPrintfSet()->append(fmt, unit);
> +            assert(fmt);
>              break;
> +          }
>            case GEN_OCL_PRINTF_BUF_ADDR:
>            case GEN_OCL_PRINTF_INDEX_BUF_ADDR:
>            default: break;
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
> index 26323a3..cc5cdad 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -95,6 +95,10 @@ namespace gbe
>    /*! Convert the Intrinsic call to gen function */
>    llvm::BasicBlockPass *createIntrinsicLoweringPass();
>  
> +  /*! Passer the printf function call. */
> +  llvm::FunctionPass* createPrintfParserPass();
> +
> +  void* getPrintfInfo(llvm::CallInst* inst);
>  } /* namespace gbe */
>  
>  #endif /* __GBE_LLVM_GEN_BACKEND_HPP__ */
> diff --git a/backend/src/llvm/llvm_printf_parser.cpp b/backend/src/llvm/llvm_printf_parser.cpp
> new file mode 100644
> index 0000000..ec8e76d
> --- /dev/null
> +++ b/backend/src/llvm/llvm_printf_parser.cpp
> @@ -0,0 +1,677 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +/**
> + * \file llvm_printf.cpp
Wrong file name.

> + *
> + * When there are printf functions existing, we have something to do here.
> + * Because the GPU's feature, it is relatively hard to parse and caculate the
> + * printf's format string. OpenCL 1.2 restrict the format string to be a
> + * constant string and can be decided at compiling time. So we add a pass here
> + * to parse the format string and check whether the parameters is valid.
> + * If all are valid, we will generate the according instruction to store the
> + * parameter content into the printf buffer. And if something is invalid, a
> + * warning is generated and the printf instruction is skipped in order to avoid
> + * GPU error. We also keep the relationship between the printf format and printf
> + * content in GPU's printf buffer here, and use the system's C standard printf to
> + * print the content after kernel executed.
> + */
> +#include <stdio.h>
> +#include <stdlib.h>
> +
> +#include "llvm/Config/config.h"
> +#if LLVM_VERSION_MINOR <= 2
> +#include "llvm/Function.h"
> +#include "llvm/InstrTypes.h"
> +#include "llvm/Instructions.h"
> +#include "llvm/IntrinsicInst.h"
> +#include "llvm/Module.h"
> +#else
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#endif  /* LLVM_VERSION_MINOR <= 2 */
> +#include "llvm/Pass.h"
> +#if LLVM_VERSION_MINOR <= 1
> +#include "llvm/Support/IRBuilder.h"
> +#elif LLVM_VERSION_MINOR == 2
> +#include "llvm/IRBuilder.h"
> +#else
> +#include "llvm/IR/IRBuilder.h"
> +#endif /* LLVM_VERSION_MINOR <= 1 */
> +#include "llvm/Support/CallSite.h"
> +#include "llvm/Support/CFG.h"
> +#include "llvm/Support/raw_ostream.h"
> +#include "llvm/IR/Attributes.h"
> +
> +#include "llvm/llvm_gen_backend.hpp"
> +#include "sys/map.hpp"
> +#include "ir/printf.hpp"
> +
> +using namespace llvm;
> +
> +namespace gbe
> +{
> +  using namespace ir;
> +
> +  /* Return the conversion_specifier if succeed, -1 if failed. */
> +  static char __parse_printf_state(char *begin, char *end, char** rend, PrintfState * state)
> +  {
> +    const char *fmt;
> +    state->left_justified = 0;
> +    state->sign_symbol = 0; //0 for nothing, 1 for sign, 2 for space.
> +    state->alter_form = 0;
> +    state->zero_padding = 0;
> +    state->vector_n = 0;
> +    state->min_width = 0;
> +    state->precision = 0;
> +    state->length_modifier = 0;
> +    state->conversion_specifier = PRINTF_CONVERSION_INVALID;
> +    state->out_buf_sizeof_offset = -1;
> +
> +    fmt = begin;
> +
> +    if (*fmt != '%')
> +      return -1;
> +
> +#define FMT_PLUS_PLUS do {                                  \
> +      if (fmt + 1 < end) fmt++;                             \
> +      else {                                                \
> +        printf("Error, line: %d, fmt > end\n", __LINE__);   \
> +        return -1;                                          \
> +      }                                                     \
> +    }  while(0)
> +
> +    FMT_PLUS_PLUS;
> +
> +    // parse the flags.
> +    switch (*fmt) {
> +      case '-':
> +        /* The result of the conversion is left-justified within the field. */
> +        state->left_justified = 1;
> +        FMT_PLUS_PLUS;
> +        break;
> +      case '+':
> +        /* The result of a signed conversion always begins with a plus or minus sign. */
> +        state->sign_symbol = 1;
> +        FMT_PLUS_PLUS;
> +        break;
> +      case ' ':
> +        /* If the first character of a signed conversion is not a sign, or if a signed
> +           conversion results in no characters, a space is prefixed to the result.
> +           If the space and + flags both appear,the space flag is ignored. */
> +        if (state->sign_symbol == 0) state->sign_symbol = 2;
> +        FMT_PLUS_PLUS;
> +        break;
> +      case '#':
> +        /*The result is converted to an alternative form. */
> +        state->alter_form = 1;
> +        FMT_PLUS_PLUS;
> +        break;
> +      case '0':
> +        if (!state->left_justified) state->zero_padding = 1;
> +        FMT_PLUS_PLUS;
> +        break;
> +      default:
> +        break;
> +    }
> +
> +    // The minimum field width
> +    while ((*fmt >= '0') && (*fmt <= '9')) {
> +      state->min_width = state->min_width * 10 + (*fmt - '0');
> +      FMT_PLUS_PLUS;
> +    }
> +
> +    // The precision
> +    if (*fmt == '.') {
> +      FMT_PLUS_PLUS;
> +      while (*fmt >= '0' && *fmt <= '9') {
> +        state->precision = state->precision * 10 + (*fmt - '0');
> +        FMT_PLUS_PLUS;
> +      }
> +    }
> +
> +    // handle the vector specifier.
> +    if (*fmt == 'v') {
> +      FMT_PLUS_PLUS;
> +      switch (*fmt) {
> +        case '2':
> +          state->vector_n = 2;
> +          FMT_PLUS_PLUS;
> +          break;
> +        case '3':
> +          state->vector_n = 3;
> +          FMT_PLUS_PLUS;
> +          break;
> +        case '4':
> +          state->vector_n = 4;
> +          FMT_PLUS_PLUS;
> +          break;
> +        case '8':
> +          state->vector_n = 8;
> +          FMT_PLUS_PLUS;
> +          break;
> +        case '1':
> +          FMT_PLUS_PLUS;
> +          if (*fmt == '6') {
> +            state->vector_n = 16;
> +            FMT_PLUS_PLUS;
> +          } else
> +            return -1;
> +          break;
Some code above could be simplified as below:

           case '2':
           case '3':
           case '4':
           case '8':
             state->vector_n = *fmt - '0';
             FMT_PLUS_PLUS;
             break;
           case 'l':
           .... 
> +        default:
> +          //Wrong vector, error.
> +          return -1;
> +      }
> +    }
> +
> +    // length modifiers
> +    if (*fmt == 'h') {
> +      FMT_PLUS_PLUS;
> +      if (*fmt == 'h') { //hh
> +        state->length_modifier = PRINTF_LM_HH;
> +        FMT_PLUS_PLUS;
> +      } else if (*fmt == 'l') { //hl
> +        state->length_modifier = PRINTF_LM_HL;
> +        FMT_PLUS_PLUS;
> +      } else { //h
> +        state->length_modifier = PRINTF_LM_H;
> +      }
> +    } else if (*fmt == 'l') {
> +      state->length_modifier = PRINTF_LM_L;
> +      FMT_PLUS_PLUS;
> +    }
> +
> +#define CONVERSION_SPEC_AND_RET(XXX, xxx)                           \
> +    case XXX:                                                       \
> +      state->conversion_specifier = PRINTF_CONVERSION_##xxx;        \
> +      FMT_PLUS_PLUS;                                                \
> +      *rend = (char *)fmt;                                          \
> +      return XXX;                                                   \
> +      break;
> +
> +    // conversion specifiers
> +    switch (*fmt) {
> +        CONVERSION_SPEC_AND_RET('d', D)
> +        CONVERSION_SPEC_AND_RET('i', I)
> +        CONVERSION_SPEC_AND_RET('o', O)
> +        CONVERSION_SPEC_AND_RET('u', U)
> +        CONVERSION_SPEC_AND_RET('x', x)
> +        CONVERSION_SPEC_AND_RET('X', X)
> +        CONVERSION_SPEC_AND_RET('f', f)
> +        CONVERSION_SPEC_AND_RET('F', F)
> +        CONVERSION_SPEC_AND_RET('e', e)
> +        CONVERSION_SPEC_AND_RET('E', E)
> +        CONVERSION_SPEC_AND_RET('g', g)
> +        CONVERSION_SPEC_AND_RET('G', G)
> +        CONVERSION_SPEC_AND_RET('a', a)
> +        CONVERSION_SPEC_AND_RET('A', A)
> +        CONVERSION_SPEC_AND_RET('c', C)
> +        CONVERSION_SPEC_AND_RET('s', A)
> +        CONVERSION_SPEC_AND_RET('p', P)
> +
> +        // %% has been handled
> +
> +      default:
> +        return -1;
> +    }
> +  }
> +
> +  static PrintfSet::PrintfFmt* parser_printf_fmt(char* format, int& num)
> +  {
> +    char* begin;
> +    char* end;
> +    char* p;
> +    char ret_char;
> +    char* rend;
> +    PrintfState state;
> +    PrintfSet::PrintfFmt* printf_fmt = new PrintfSet::PrintfFmt();
> +
> +    p = format;
> +    begin = format;
> +    end = format + strlen(format);
> +
> +    /* Now parse it. */
> +    while (*begin) {
> +      p = begin;
> +
> +again:
> +      while (p < end && *p != '%') {
> +        p++;
> +      }
> +      if (p < end && p + 1 == end) { // String with % at end.
> +        printf("string end with %%\n");
> +        goto error;
> +      }
> +      if (*(p + 1) == '%') { // %%
> +        p += 2;
> +        goto again;
> +      }
> +
> +      if (p != begin) {
> +        std::string s = std::string(begin, size_t(p - begin));
> +        printf_fmt->push_back(PrintfSlot(s.c_str()));
> +      }
> +
> +      if (p == end) // finish
> +        break;
> +
> +      /* Now parse the % start conversion_specifier. */
> +      ret_char = __parse_printf_state(p, end, &rend, &state);
> +      if (ret_char < 0)
> +        goto error;
> +
> +      printf_fmt->push_back(&state);
> +
> +      if (rend == end)
> +        break;
> +
> +      begin = rend;
> +    }
> +
> +    for (auto &s : *printf_fmt) {
> +      if (s.type == PRINTF_SLOT_TYPE_STATE) {
> +        num++;
> +#if 0
> +        printf("---- %d ---: state : \n", j);
> +        printf("		     left_justified : %d\n", s.state->left_justified);
> +        printf("		     sign_symbol: %d\n", s.state->sign_symbol);
> +        printf("		     alter_form : %d\n", s.state->alter_form);
> +        printf("		     zero_padding : %d\n", s.state->zero_padding);
> +        printf("		     vector_n : %d\n", s.state->vector_n);
> +        printf("		     min_width : %d\n", s.state->min_width);
> +        printf("		     precision : %d\n", s.state->precision);
> +        printf("		     length_modifier : %d\n", s.state->length_modifier);
> +        printf("		     conversion_specifier : %d\n", s.state->conversion_specifier);
> +#endif
> +      } else if (s.type == PRINTF_SLOT_TYPE_STRING) {
> +        //printf("---- %d ---: string :  %s\n", j, s.str);
> +      }
The above loop seems only for debug purpose, as you can do the num++ when you push_back each state.
If so, you can put num++ in previous loop and comment this whole loop out.

> +    }
> +
> +    return printf_fmt;
> +
> +error:
> +    printf("error format string.\n");
> +    delete printf_fmt;
> +    return NULL;
> +  }
> +
> +  class PrintfParser : public FunctionPass
> +  {
> +  public:
> +    static char ID;
> +    typedef std::pair<Instruction*, bool> PrintfInst;
> +    std::vector<PrintfInst> deadprintfs;
> +    Module* module;
> +    IRBuilder<>* builder;
> +    Type* intTy;
> +    Value* pbuf_ptr;
> +    Value* index_buf_ptr;
> +    int out_buf_sizeof_offset;
> +    static map<CallInst*, PrintfSet::PrintfFmt*> printfs;
> +    int printf_num;
> +
> +    PrintfParser(void) : FunctionPass(ID) {
> +      module = NULL;
> +      builder = NULL;
> +      intTy = NULL;
> +      out_buf_sizeof_offset = 0;
> +      printfs.clear();
> +      pbuf_ptr = NULL;
> +      index_buf_ptr = NULL;
> +      printf_num = 0;
> +    }
> +
> +    ~PrintfParser(void) {
> +      for (auto &s : printfs) {
> +        delete s.second;
> +        s.second = NULL;
> +      }
> +      printfs.clear();
> +    }
> +
> +
> +    bool parseOnePrintfInstruction(CallInst *& call);
> +    int generateOneParameterInst(PrintfSlot& slot, Value& arg);
> +
> +    virtual const char *getPassName() const {
> +      return "Printf Parser";
> +    }
> +
> +    virtual bool runOnFunction(llvm::Function &F);
> +  };
> +
> +  bool PrintfParser::parseOnePrintfInstruction(CallInst *& call)
> +  {
> +    CallSite CS(call);
> +    CallSite::arg_iterator CI_FMT = CS.arg_begin();
> +    int param_num = 0;
> +
> +    llvm::Constant* arg0 = dyn_cast<llvm::ConstantExpr>(*CI_FMT);
> +    llvm::Constant* arg0_ptr = dyn_cast<llvm::Constant>(arg0->getOperand(0));
> +    if (!arg0_ptr) {
> +      return false;
> +    }
> +
> +    ConstantDataSequential* fmt_arg = dyn_cast<ConstantDataSequential>(arg0_ptr->getOperand(0));
> +    if (!fmt_arg || !fmt_arg->isCString()) {
> +      return false;
> +    }
> +
> +    std::string fmt = fmt_arg->getAsCString();
> +
> +    PrintfSet::PrintfFmt* printf_fmt = NULL;
> +
> +    if (!(printf_fmt = parser_printf_fmt((char *)fmt.c_str(), param_num))) {//at lease print something
> +      return false;
> +    }
> +
> +    /* iff parameter more than %, error. */
> +    /* str_fmt arg0 arg1 ... NULL */
> +    if (param_num + 2 < static_cast<int>(call->getNumOperands())) {
> +      delete printf_fmt;
> +      return false;
> +    }
> +
> +    /* FIXME: Because the OpenCL language do not support va macro, and we do not want
> +       to introduce the va_list, va_start and va_end into our code, we just simulate
> +       the function calls to caculate the offset caculation here. */

Use macro to simplfy the following duplicate code should be better.

> +    CallInst* group_id_2 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                             "__gen_ocl_get_group_id2",
> +                             IntegerType::getInt32Ty(module->getContext()),
> +                             NULL)));
> +    CallInst* group_id_1 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                             "__gen_ocl_get_group_id1",
> +                             IntegerType::getInt32Ty(module->getContext()),
> +                             NULL)));
> +    CallInst* group_id_0 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                             "__gen_ocl_get_group_id0",
> +                             IntegerType::getInt32Ty(module->getContext()),
> +                             NULL)));
> +
> +    CallInst* global_size_2 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                                "__gen_ocl_get_global_size2",
> +                                IntegerType::getInt32Ty(module->getContext()),
> +                                NULL)));
> +    CallInst* global_size_1 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                                "__gen_ocl_get_global_size1",
> +                                IntegerType::getInt32Ty(module->getContext()),
> +                                NULL)));
> +    CallInst* global_size_0 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                                "__gen_ocl_get_global_size0",
> +                                IntegerType::getInt32Ty(module->getContext()),
> +                                NULL)));
> +
> +    CallInst* local_id_2 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                             "__gen_ocl_get_local_id2",
> +                             IntegerType::getInt32Ty(module->getContext()),
> +                             NULL)));
> +    CallInst* local_id_1 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                             "__gen_ocl_get_local_id1",
> +                             IntegerType::getInt32Ty(module->getContext()),
> +                             NULL)));
> +    CallInst* local_id_0 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                             "__gen_ocl_get_local_id0",
> +                             IntegerType::getInt32Ty(module->getContext()),
> +                             NULL)));
> +
> +    CallInst* local_size_2 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                               "__gen_ocl_get_local_size2",
> +                               IntegerType::getInt32Ty(module->getContext()),
> +                               NULL)));
> +    CallInst* local_size_1 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                               "__gen_ocl_get_local_size1",
> +                               IntegerType::getInt32Ty(module->getContext()),
> +                               NULL)));
> +    CallInst* local_size_0 = builder->CreateCall(cast<llvm::Function>(module->getOrInsertFunction(
> +                               "__gen_ocl_get_local_size0",
> +                               IntegerType::getInt32Ty(module->getContext()),
> +                               NULL)));
> +    Value* op0 = NULL;
> +    Value* val = NULL;
> +    /* offset = ((local_id_2 + local_size_2 * group_id_2) * (global_size_1 * global_size_0)
> +       + (local_id_1 + local_size_1 * group_id_1) * global_size_0
> +       + (local_id_0 + local_size_0 * group_id_0)) * sizeof(type)  */
       Just add a few words in the above comments to indicate the following code is to generate
       identical instructions.

Other parts LGTM.