[Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.

Wed Dec 17 23:01:47 PST 2014

Thanks for the comment, will fix those and push latter.

On Thu, Dec 18, 2014 at 07:43:20AM +0000, Yang, Rong R wrote:
> Need change llvm_sampler_fix.cpp's author and comment.
> The others of this patchset LGTM.
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> > Zhigang Gong
> > Sent: Monday, December 15, 2014 09:02
> > To: beignet at lists.freedesktop.org
> > Cc: Gong, Zhigang
> > Subject: [Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.
> > 
> > CLANG has sampler_t support since LLVM 3.3, let's switch to that type rather
> > than the old hacky way. One major problem is the sampler static checking. As
> > Gen platform has some hardware restrication and if the sampler value is a
> > const defined at kernel side, we need to use the value to optimize the code
> > path. Now the sampler_t becomes an obaque type now, the CLANG doesn't
> > support any arithmatic operations on it.
> > So we have to introduce a new pass to do this optimization.
> > 
> > Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> > ---
> >  backend/src/CMakeLists.txt             |   1 +
> >  backend/src/ir/function.hpp            |   5 ++
> >  backend/src/ir/sampler.cpp             |   6 +-
> >  backend/src/libocl/include/ocl_types.h |   4 +-
> >  backend/src/libocl/src/ocl_image.cl    |  27 ++++---
> >  backend/src/llvm/llvm_gen_backend.cpp  |   8 +-
> >  backend/src/llvm/llvm_gen_backend.hpp  |   1 +
> >  backend/src/llvm/llvm_sampler_fix.cpp  | 144
> > +++++++++++++++++++++++++++++++++
> >  backend/src/llvm/llvm_to_gen.cpp       |   1 +
> >  src/cl_kernel.c                        |  12 ++-
> >  10 files changed, 184 insertions(+), 25 deletions(-)  create mode 100644
> > backend/src/llvm/llvm_sampler_fix.cpp
> > 
> > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> > b4555f1..deba230 100644
> > --- a/backend/src/CMakeLists.txt
> > +++ b/backend/src/CMakeLists.txt
> > @@ -73,6 +73,7 @@ set (GBE_SRC
> >      backend/program.cpp
> >      backend/program.hpp
> >      backend/program.h
> > +    llvm/llvm_sampler_fix.cpp
> >      llvm/llvm_bitcode_link.cpp
> >      llvm/llvm_gen_backend.cpp
> >      llvm/llvm_passes.cpp
> > diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index
> > 0f86fef..4aea087 100644
> > --- a/backend/src/ir/function.hpp
> > +++ b/backend/src/ir/function.hpp
> > @@ -204,6 +204,11 @@ namespace ir {
> >          return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
> >                 isImage2dT() || isImage2dArrayT() || isImage3dT();
> >        }
> > +
> > +      bool isSamplerType() const {
> > +        return typeName.compare("sampler_t") == 0;
> > +      }
> > +
> >      };
> > 
> >      /*! Create a function input argument */ diff --git
> > a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index
> > ba42acb..e4accca 100644
> > --- a/backend/src/ir/sampler.cpp
> > +++ b/backend/src/ir/sampler.cpp
> > @@ -49,11 +49,7 @@ namespace ir {
> >      ir::FunctionArgument *arg =  ctx->getFunction().getArg(samplerReg);
> >      GBE_ASSERT(arg != NULL);
> > 
> > -    // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t,
> > we have to fix up the argument
> > -    // type here. Once we switch to the LLVM and use the new data type
> > sampler_t, we can remove this
> > -    // work around.
> > -    arg->type = ir::FunctionArgument::SAMPLER;
> > -    arg->info.typeName = "sampler_t";
> > +    GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER);
> >      int32_t id = ctx->getFunction().getArgID(arg);
> >      GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
> > 
> > diff --git a/backend/src/libocl/include/ocl_types.h
> > b/backend/src/libocl/include/ocl_types.h
> > index 49ac907..7798ee1 100644
> > --- a/backend/src/libocl/include/ocl_types.h
> > +++ b/backend/src/libocl/include/ocl_types.h
> > @@ -87,8 +87,8 @@ DEF(double);
> >  // FIXME:
> >  // This is a transitional hack to bypass the LLVM 3.3 built-in types.
> >  // See the Khronos SPIR specification for handling of these types.
> > -#define sampler_t __sampler_t
> > -typedef const ushort __sampler_t;
> > +//#define sampler_t __sampler_t
> > +//typedef const ushort __sampler_t;
> > 
> >  /////////////////////////////////////////////////////////////////////////////
> >  // OpenCL built-in event types
> > diff --git a/backend/src/libocl/src/ocl_image.cl
> > b/backend/src/libocl/src/ocl_image.cl
> > index c4ca2f8..6da8e90 100644
> > --- a/backend/src/libocl/src/ocl_image.cl
> > +++ b/backend/src/libocl/src/ocl_image.cl
> > @@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int,
> > image1d_buffer_t)  // integer type surfaces correctly with
> > CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
> >  // The work around is to use a LD message instead of normal sample
> > message.
> > 
> > //////////////////////////////////////////////////////////////////////////////
> > /
> > +
> > +bool __gen_ocl_sampler_need_fix(sampler_t);
> > +bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
> > +
> >  bool __gen_sampler_need_fix(const sampler_t sampler)  {
> > -  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> > &&
> > -          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> > +  return __gen_ocl_sampler_need_fix(sampler);
> > +
> > +//  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> > &&
> > +//          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> >  }
> > 
> >  bool __gen_sampler_need_rounding_fix(const sampler_t sampler)  {
> > -  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> > +  return __gen_ocl_sampler_need_rounding_fix(sampler);
> > +//  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> >  }
> > 
> > -
> >  INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)  {
> >    if (tmpCoord < 0 && tmpCoord > -0x1p-20f) @@ -311,7 +317,7 @@
> > INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
> >              __gen_sampler_need_rounding_fix(sampler))                         \
> >            tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
> >          if (int_clamping_fix) {                                               \
> > -            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
> > +            if (!__gen_sampler_need_rounding_fix(sampler))                    \
> >                tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
> >              tmpCoord = __gen_fixup_neg_boundary(tmpCoord);                    \
> >              return __gen_ocl_read_image ##suffix(                             \
> > @@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3
> > __gen_fixup_neg_boundary(float3 coord)
> >                                                 coord_type coord)              \
> >    {                                                                           \
> >      coord = __gen_validate_array_index(coord, cl_image);                      \
> > +    sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE |
> > CLK_ADDRESS_NONE \
> > +                               | CLK_FILTER_NEAREST;                          \
> >      return __gen_ocl_read_image ##suffix(                                     \
> > -             cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
> > \
> > -             | CLK_FILTER_NEAREST, coord, 0);                                 \
> > +             cl_image, defaultSampler, coord, 0);                             \
> >    }
> > 
> >  #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix,
> > coord_type)     \
> > @@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4
> > __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
> >              __gen_sampler_need_rounding_fix(sampler))                         \
> >            tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
> >          if (int_clamping_fix) {                                               \
> > -            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
> > +            if (!__gen_sampler_need_rounding_fix(sampler))                    \
> >                tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
> >              float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> > \
> >              return __gen_ocl_read_image ##suffix(                             \
> > -                     cl_image, sampler, newCoord, 2);                       \
> > +                     cl_image, sampler, newCoord, 2);                         \
> >          }                                                                     \
> >        }                                                                       \
> >      }                                                                         \
> > -    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> > \
> > +    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> > \
> >    }
> > 
> >  #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> > \
> > diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> > b/backend/src/llvm/llvm_gen_backend.cpp
> > index 512f437..36a6cb3 100644
> > --- a/backend/src/llvm/llvm_gen_backend.cpp
> > +++ b/backend/src/llvm/llvm_gen_backend.cpp
> > @@ -1591,6 +1591,12 @@ namespace gbe
> >            continue;
> >          }
> > 
> > +        if (llvmInfo.isSamplerType()) {
> > +          ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo,
> > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> > +          (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx);
> > +          continue;
> > +        }
> > +
> >          if (type->isPointerTy() == false)
> >            ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo,
> > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> >          else {
> > @@ -3013,7 +3019,7 @@ namespace gbe
> >        // This is not a kernel argument sampler, we need to append it to sampler
> > set,
> >        // and allocate a sampler slot for it.
> >        const ir::Immediate &x = processConstantImm(CPV);
> > -      GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() ==
> > ir::TYPE_S16, "Invalid sampler type");
> > +      GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> > + ir::TYPE_S32, "Invalid sampler type");
> > 
> >        index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(),
> > &ctx);
> >      } else {
> > diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> > b/backend/src/llvm/llvm_gen_backend.hpp
> > index ed7a57e..a496c16 100644
> > --- a/backend/src/llvm/llvm_gen_backend.hpp
> > +++ b/backend/src/llvm/llvm_gen_backend.hpp
> > @@ -129,6 +129,7 @@ namespace gbe
> >    /* customized loop unrolling pass. */
> >    llvm::LoopPass *createCustomLoopUnrollPass();  #endif
> > +  llvm::FunctionPass* createSamplerFixPass();
> > 
> >    /*! Add all the function call of ocl to our bitcode. */
> >    llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); diff
> > --git a/backend/src/llvm/llvm_sampler_fix.cpp
> > b/backend/src/llvm/llvm_sampler_fix.cpp
> > new file mode 100644
> > index 0000000..ec498d0
> > --- /dev/null
> > +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> > @@ -0,0 +1,144 @@
> > +/*
> > + * Copyright © 2012 Intel Corporation
> > + *
> > + * This library is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * This library is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with this library. If not, see
> > <http://www.gnu.org/licenses/>.
> > + *
> > + * Author: Ruiling, Song <ruiling.song at intel.com>
> > + *
> > + * Legalize unsupported integer data type i128/i256/...
> > + * right now, the implementation only consider little-endian system.
> > + *
> > + */
> > +#include "llvm/IR/Instructions.h"
> > +#include "llvm/Pass.h"
> > +#include "llvm/PassManager.h"
> > +
> > +#include "llvm/Config/llvm-config.h"
> > +#include "llvm/ADT/DenseMap.h"
> > +#include "llvm/ADT/PostOrderIterator.h"
> > +#include "llvm/IR/Function.h"
> > +#include "llvm/IR/InstrTypes.h"
> > +#include "llvm/IR/Instructions.h"
> > +#include "llvm/IR/IntrinsicInst.h"
> > +#include "llvm/IR/Module.h"
> > +#include "llvm/Pass.h"
> > +#include "llvm/IR/IRBuilder.h"
> > +#if LLVM_VERSION_MINOR >= 5
> > +#include "llvm/IR/CFG.h"
> > +#else
> > +#include "llvm/Support/CFG.h"
> > +#endif
> > +
> > +#include "llvm/Analysis/ConstantsScanner.h"
> > +
> > +#include "llvm_gen_backend.hpp"
> > +#include "ocl_common_defines.h"
> > +
> > +using namespace llvm;
> > +
> > +namespace gbe {
> > +
> > +  class SamplerFix : public FunctionPass {
> > +  public:
> > +    SamplerFix() : FunctionPass(ID) {
> > +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
> > +
> > +initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()
> > +);
> > +#else
> > +      initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> > +#endif
> > +    }
> > +
> > +    bool visitCallInst(CallInst *I) {
> > +      Value *Callee = I->getCalledValue();
> > +      const std::string fnName = Callee->getName();
> > +      bool changed = false;
> > +      Type *boolTy = IntegerType::get(I->getContext(), 1);
> > +      Type *i32Ty = IntegerType::get(I->getContext(), 32);
> > +
> > +      if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) {
> > +
> > +        //  return (((sampler & __CLK_ADDRESS_MASK) ==
> > CLK_ADDRESS_CLAMP) &&
> > +        //          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> > +        bool needFix = true;
> > +        Value *needFixVal;
> > +        if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> > +          const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> > +          uint32_t samplerInt = ci->getZExtValue();
> > +          needFix = ((samplerInt & __CLK_ADDRESS_MASK) ==
> > CLK_ADDRESS_CLAMP &&
> > +                     (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
> > +          needFixVal = ConstantInt::get(boolTy, needFix);
> > +        } else {
> > +          IRBuilder<> Builder(I->getParent());
> > +
> > +          Builder.SetInsertPoint(I);
> > +          Value *addressMask = ConstantInt::get(i32Ty,
> > __CLK_ADDRESS_MASK);
> > +          Value *addressMode = Builder.CreateAnd(I->getOperand(0),
> > addressMask);
> > +          Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> > +          Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> > clampInt);
> > +          Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
> > +          Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
> > +          Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
> > +          Value *isNearestMode = Builder.CreateICmpEQ(filterMode,
> > nearestInt);
> > +          needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
> > +        }
> > +
> > +        I->replaceAllUsesWith(needFixVal);
> > +        changed = true;
> > +      } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix")
> > + == 0) {
> > +
> > +        //  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> > +        bool needFix = true;
> > +        Value *needFixVal;
> > +        if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> > +          const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> > +          uint32_t samplerInt = ci->getZExtValue();
> > +          needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
> > +          needFixVal = ConstantInt::get(boolTy, needFix);
> > +        } else {
> > +          IRBuilder<> Builder(I->getParent());
> > +          Builder.SetInsertPoint(I);
> > +          Value *normalizeMask = ConstantInt::get(i32Ty,
> > CLK_NORMALIZED_COORDS_TRUE);
> > +          Value *normalizeMode = Builder.CreateAnd(I->getOperand(0),
> > normalizeMask);
> > +          needFixVal = Builder.CreateICmpEQ(normalizeMode,
> > ConstantInt::get(i32Ty, 0));
> > +        }
> > +        I->replaceAllUsesWith(needFixVal);
> > +        changed = true;
> > +      }
> > +      return changed;
> > +    }
> > +
> > +    bool runOnFunction(Function& F) {
> > +      bool changed = false;
> > +      std::set<Instruction*> deadInsnSet;
> > +      for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
> > +        if (dyn_cast<CallInst>(&*I)) {
> > +          if (visitCallInst(dyn_cast<CallInst>(&*I))) {
> > +            changed = true;
> > +            deadInsnSet.insert(&*I);
> > +          }
> > +        }
> > +      }
> > +      for (auto it: deadInsnSet)
> > +        it->eraseFromParent();
> > +      return changed;
> > +    }
> > +
> > +    static char ID;
> > +  };
> > +
> > +  FunctionPass* createSamplerFixPass() {
> > +    return new SamplerFix();
> > +  }
> > +  char SamplerFix::ID = 0;
> > +};
> > diff --git a/backend/src/llvm/llvm_to_gen.cpp
> > b/backend/src/llvm/llvm_to_gen.cpp
> > index e1bf12f..1c247b8 100644
> > --- a/backend/src/llvm/llvm_to_gen.cpp
> > +++ b/backend/src/llvm/llvm_to_gen.cpp
> > @@ -121,6 +121,7 @@ namespace gbe
> >      MPM.add(createTypeBasedAliasAnalysisPass());
> >      MPM.add(createBasicAliasAnalysisPass());
> >      MPM.add(createIntrinsicLoweringPass());
> > +    MPM.add(createSamplerFixPass());
> >      MPM.add(createGlobalOptimizerPass());     // Optimize out global vars
> > 
> >      MPM.add(createIPSCCPPass());              // IP SCCP
> > diff --git a/src/cl_kernel.c b/src/cl_kernel.c index a869515..177cb00 100644
> > --- a/src/cl_kernel.c
> > +++ b/src/cl_kernel.c
> > @@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t
> > sz, const void *value)
> >    arg_sz = interp_kernel_get_arg_size(k->opaque, index);
> > 
> >    if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) {
> > -    if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz ==
> > sizeof(cl_sampler)) {
> > -      /* FIXME, this is a workaround for the case when a kernel arg
> > -         defined a sampler_t but doesn't use it.*/
> > -      arg_type = GBE_ARG_SAMPLER;
> > -    } else
> > +    if (arg_type != GBE_ARG_SAMPLER ||
> > +        (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler)))
> >        return CL_INVALID_ARG_SIZE;
> >    }
> > 
> > @@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz,
> > const void *value)
> >      k->args[index].sampler = sampler;
> >      cl_set_sampler_arg_slot(k, index, sampler);
> >      offset = interp_kernel_get_curbe_offset(k->opaque,
> > GBE_CURBE_KERNEL_ARGUMENT, index);
> > -    assert(offset + 2 <= k->curbe_sz);
> > -    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
> > +    //assert(arg_sz == 4);
> > +    assert(offset + 4 <= k->curbe_sz);
> > +    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4);
> >      return CL_SUCCESS;
> >    }
> > 
> > --
> > 1.8.3.2
> > 
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet