[Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.

Wed Dec 17 23:43:20 PST 2014

Need change llvm_sampler_fix.cpp's author and comment.
The others of this patchset LGTM.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Zhigang Gong
> Sent: Monday, December 15, 2014 09:02
> To: beignet at lists.freedesktop.org
> Cc: Gong, Zhigang
> Subject: [Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.
> 
> CLANG has sampler_t support since LLVM 3.3, let's switch to that type rather
> than the old hacky way. One major problem is the sampler static checking. As
> Gen platform has some hardware restrication and if the sampler value is a
> const defined at kernel side, we need to use the value to optimize the code
> path. Now the sampler_t becomes an obaque type now, the CLANG doesn't
> support any arithmatic operations on it.
> So we have to introduce a new pass to do this optimization.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  backend/src/CMakeLists.txt             |   1 +
>  backend/src/ir/function.hpp            |   5 ++
>  backend/src/ir/sampler.cpp             |   6 +-
>  backend/src/libocl/include/ocl_types.h |   4 +-
>  backend/src/libocl/src/ocl_image.cl    |  27 ++++---
>  backend/src/llvm/llvm_gen_backend.cpp  |   8 +-
>  backend/src/llvm/llvm_gen_backend.hpp  |   1 +
>  backend/src/llvm/llvm_sampler_fix.cpp  | 144
> +++++++++++++++++++++++++++++++++
>  backend/src/llvm/llvm_to_gen.cpp       |   1 +
>  src/cl_kernel.c                        |  12 ++-
>  10 files changed, 184 insertions(+), 25 deletions(-)  create mode 100644
> backend/src/llvm/llvm_sampler_fix.cpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> b4555f1..deba230 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -73,6 +73,7 @@ set (GBE_SRC
>      backend/program.cpp
>      backend/program.hpp
>      backend/program.h
> +    llvm/llvm_sampler_fix.cpp
>      llvm/llvm_bitcode_link.cpp
>      llvm/llvm_gen_backend.cpp
>      llvm/llvm_passes.cpp
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index
> 0f86fef..4aea087 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -204,6 +204,11 @@ namespace ir {
>          return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
>                 isImage2dT() || isImage2dArrayT() || isImage3dT();
>        }
> +
> +      bool isSamplerType() const {
> +        return typeName.compare("sampler_t") == 0;
> +      }
> +
>      };
> 
>      /*! Create a function input argument */ diff --git
> a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index
> ba42acb..e4accca 100644
> --- a/backend/src/ir/sampler.cpp
> +++ b/backend/src/ir/sampler.cpp
> @@ -49,11 +49,7 @@ namespace ir {
>      ir::FunctionArgument *arg =  ctx->getFunction().getArg(samplerReg);
>      GBE_ASSERT(arg != NULL);
> 
> -    // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t,
> we have to fix up the argument
> -    // type here. Once we switch to the LLVM and use the new data type
> sampler_t, we can remove this
> -    // work around.
> -    arg->type = ir::FunctionArgument::SAMPLER;
> -    arg->info.typeName = "sampler_t";
> +    GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER);
>      int32_t id = ctx->getFunction().getArgID(arg);
>      GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
> 
> diff --git a/backend/src/libocl/include/ocl_types.h
> b/backend/src/libocl/include/ocl_types.h
> index 49ac907..7798ee1 100644
> --- a/backend/src/libocl/include/ocl_types.h
> +++ b/backend/src/libocl/include/ocl_types.h
> @@ -87,8 +87,8 @@ DEF(double);
>  // FIXME:
>  // This is a transitional hack to bypass the LLVM 3.3 built-in types.
>  // See the Khronos SPIR specification for handling of these types.
> -#define sampler_t __sampler_t
> -typedef const ushort __sampler_t;
> +//#define sampler_t __sampler_t
> +//typedef const ushort __sampler_t;
> 
>  /////////////////////////////////////////////////////////////////////////////
>  // OpenCL built-in event types
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index c4ca2f8..6da8e90 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int,
> image1d_buffer_t)  // integer type surfaces correctly with
> CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
>  // The work around is to use a LD message instead of normal sample
> message.
> 
> //////////////////////////////////////////////////////////////////////////////
> /
> +
> +bool __gen_ocl_sampler_need_fix(sampler_t);
> +bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
> +
>  bool __gen_sampler_need_fix(const sampler_t sampler)  {
> -  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> &&
> -          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> +  return __gen_ocl_sampler_need_fix(sampler);
> +
> +//  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> &&
> +//          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
>  }
> 
>  bool __gen_sampler_need_rounding_fix(const sampler_t sampler)  {
> -  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> +  return __gen_ocl_sampler_need_rounding_fix(sampler);
> +//  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
>  }
> 
> -
>  INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)  {
>    if (tmpCoord < 0 && tmpCoord > -0x1p-20f) @@ -311,7 +317,7 @@
> INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
>              __gen_sampler_need_rounding_fix(sampler))                         \
>            tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
>          if (int_clamping_fix) {                                               \
> -            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
> +            if (!__gen_sampler_need_rounding_fix(sampler))                    \
>                tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
>              tmpCoord = __gen_fixup_neg_boundary(tmpCoord);                    \
>              return __gen_ocl_read_image ##suffix(                             \
> @@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
>                                                 coord_type coord)              \
>    {                                                                           \
>      coord = __gen_validate_array_index(coord, cl_image);                      \
> +    sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE |
> CLK_ADDRESS_NONE \
> +                               | CLK_FILTER_NEAREST;                          \
>      return __gen_ocl_read_image ##suffix(                                     \
> -             cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
> \
> -             | CLK_FILTER_NEAREST, coord, 0);                                 \
> +             cl_image, defaultSampler, coord, 0);                             \
>    }
> 
>  #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix,
> coord_type)     \
> @@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4
> __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
>              __gen_sampler_need_rounding_fix(sampler))                         \
>            tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
>          if (int_clamping_fix) {                                               \
> -            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
> +            if (!__gen_sampler_need_rounding_fix(sampler))                    \
>                tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
>              float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> \
>              return __gen_ocl_read_image ##suffix(                             \
> -                     cl_image, sampler, newCoord, 2);                       \
> +                     cl_image, sampler, newCoord, 2);                         \
>          }                                                                     \
>        }                                                                       \
>      }                                                                         \
> -    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> \
> +    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> \
>    }
> 
>  #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> \
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 512f437..36a6cb3 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1591,6 +1591,12 @@ namespace gbe
>            continue;
>          }
> 
> +        if (llvmInfo.isSamplerType()) {
> +          ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo,
> getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> +          (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx);
> +          continue;
> +        }
> +
>          if (type->isPointerTy() == false)
>            ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo,
> getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
>          else {
> @@ -3013,7 +3019,7 @@ namespace gbe
>        // This is not a kernel argument sampler, we need to append it to sampler
> set,
>        // and allocate a sampler slot for it.
>        const ir::Immediate &x = processConstantImm(CPV);
> -      GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() ==
> ir::TYPE_S16, "Invalid sampler type");
> +      GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> + ir::TYPE_S32, "Invalid sampler type");
> 
>        index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(),
> &ctx);
>      } else {
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> b/backend/src/llvm/llvm_gen_backend.hpp
> index ed7a57e..a496c16 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -129,6 +129,7 @@ namespace gbe
>    /* customized loop unrolling pass. */
>    llvm::LoopPass *createCustomLoopUnrollPass();  #endif
> +  llvm::FunctionPass* createSamplerFixPass();
> 
>    /*! Add all the function call of ocl to our bitcode. */
>    llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); diff
> --git a/backend/src/llvm/llvm_sampler_fix.cpp
> b/backend/src/llvm/llvm_sampler_fix.cpp
> new file mode 100644
> index 0000000..ec498d0
> --- /dev/null
> +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> @@ -0,0 +1,144 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + * Author: Ruiling, Song <ruiling.song at intel.com>
> + *
> + * Legalize unsupported integer data type i128/i256/...
> + * right now, the implementation only consider little-endian system.
> + *
> + */
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/Pass.h"
> +#include "llvm/PassManager.h"
> +
> +#include "llvm/Config/llvm-config.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#include "llvm/Pass.h"
> +#include "llvm/IR/IRBuilder.h"
> +#if LLVM_VERSION_MINOR >= 5
> +#include "llvm/IR/CFG.h"
> +#else
> +#include "llvm/Support/CFG.h"
> +#endif
> +
> +#include "llvm/Analysis/ConstantsScanner.h"
> +
> +#include "llvm_gen_backend.hpp"
> +#include "ocl_common_defines.h"
> +
> +using namespace llvm;
> +
> +namespace gbe {
> +
> +  class SamplerFix : public FunctionPass {
> +  public:
> +    SamplerFix() : FunctionPass(ID) {
> +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
> +
> +initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()
> +);
> +#else
> +      initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> +#endif
> +    }
> +
> +    bool visitCallInst(CallInst *I) {
> +      Value *Callee = I->getCalledValue();
> +      const std::string fnName = Callee->getName();
> +      bool changed = false;
> +      Type *boolTy = IntegerType::get(I->getContext(), 1);
> +      Type *i32Ty = IntegerType::get(I->getContext(), 32);
> +
> +      if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) {
> +
> +        //  return (((sampler & __CLK_ADDRESS_MASK) ==
> CLK_ADDRESS_CLAMP) &&
> +        //          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> +        bool needFix = true;
> +        Value *needFixVal;
> +        if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> +          const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> +          uint32_t samplerInt = ci->getZExtValue();
> +          needFix = ((samplerInt & __CLK_ADDRESS_MASK) ==
> CLK_ADDRESS_CLAMP &&
> +                     (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
> +          needFixVal = ConstantInt::get(boolTy, needFix);
> +        } else {
> +          IRBuilder<> Builder(I->getParent());
> +
> +          Builder.SetInsertPoint(I);
> +          Value *addressMask = ConstantInt::get(i32Ty,
> __CLK_ADDRESS_MASK);
> +          Value *addressMode = Builder.CreateAnd(I->getOperand(0),
> addressMask);
> +          Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> +          Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> clampInt);
> +          Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
> +          Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
> +          Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
> +          Value *isNearestMode = Builder.CreateICmpEQ(filterMode,
> nearestInt);
> +          needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
> +        }
> +
> +        I->replaceAllUsesWith(needFixVal);
> +        changed = true;
> +      } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix")
> + == 0) {
> +
> +        //  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> +        bool needFix = true;
> +        Value *needFixVal;
> +        if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> +          const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> +          uint32_t samplerInt = ci->getZExtValue();
> +          needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
> +          needFixVal = ConstantInt::get(boolTy, needFix);
> +        } else {
> +          IRBuilder<> Builder(I->getParent());
> +          Builder.SetInsertPoint(I);
> +          Value *normalizeMask = ConstantInt::get(i32Ty,
> CLK_NORMALIZED_COORDS_TRUE);
> +          Value *normalizeMode = Builder.CreateAnd(I->getOperand(0),
> normalizeMask);
> +          needFixVal = Builder.CreateICmpEQ(normalizeMode,
> ConstantInt::get(i32Ty, 0));
> +        }
> +        I->replaceAllUsesWith(needFixVal);
> +        changed = true;
> +      }
> +      return changed;
> +    }
> +
> +    bool runOnFunction(Function& F) {
> +      bool changed = false;
> +      std::set<Instruction*> deadInsnSet;
> +      for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
> +        if (dyn_cast<CallInst>(&*I)) {
> +          if (visitCallInst(dyn_cast<CallInst>(&*I))) {
> +            changed = true;
> +            deadInsnSet.insert(&*I);
> +          }
> +        }
> +      }
> +      for (auto it: deadInsnSet)
> +        it->eraseFromParent();
> +      return changed;
> +    }
> +
> +    static char ID;
> +  };
> +
> +  FunctionPass* createSamplerFixPass() {
> +    return new SamplerFix();
> +  }
> +  char SamplerFix::ID = 0;
> +};
> diff --git a/backend/src/llvm/llvm_to_gen.cpp
> b/backend/src/llvm/llvm_to_gen.cpp
> index e1bf12f..1c247b8 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -121,6 +121,7 @@ namespace gbe
>      MPM.add(createTypeBasedAliasAnalysisPass());
>      MPM.add(createBasicAliasAnalysisPass());
>      MPM.add(createIntrinsicLoweringPass());
> +    MPM.add(createSamplerFixPass());
>      MPM.add(createGlobalOptimizerPass());     // Optimize out global vars
> 
>      MPM.add(createIPSCCPPass());              // IP SCCP
> diff --git a/src/cl_kernel.c b/src/cl_kernel.c index a869515..177cb00 100644
> --- a/src/cl_kernel.c
> +++ b/src/cl_kernel.c
> @@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t
> sz, const void *value)
>    arg_sz = interp_kernel_get_arg_size(k->opaque, index);
> 
>    if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) {
> -    if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz ==
> sizeof(cl_sampler)) {
> -      /* FIXME, this is a workaround for the case when a kernel arg
> -         defined a sampler_t but doesn't use it.*/
> -      arg_type = GBE_ARG_SAMPLER;
> -    } else
> +    if (arg_type != GBE_ARG_SAMPLER ||
> +        (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler)))
>        return CL_INVALID_ARG_SIZE;
>    }
> 
> @@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz,
> const void *value)
>      k->args[index].sampler = sampler;
>      cl_set_sampler_arg_slot(k, index, sampler);
>      offset = interp_kernel_get_curbe_offset(k->opaque,
> GBE_CURBE_KERNEL_ARGUMENT, index);
> -    assert(offset + 2 <= k->curbe_sz);
> -    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
> +    //assert(arg_sz == 4);
> +    assert(offset + 4 <= k->curbe_sz);
> +    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4);
>      return CL_SUCCESS;
>    }
> 
> --
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet