[Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed Dec 17 23:01:47 PST 2014
Thanks for the comment, will fix those and push latter.
On Thu, Dec 18, 2014 at 07:43:20AM +0000, Yang, Rong R wrote:
> Need change llvm_sampler_fix.cpp's author and comment.
> The others of this patchset LGTM.
>
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> > Zhigang Gong
> > Sent: Monday, December 15, 2014 09:02
> > To: beignet at lists.freedesktop.org
> > Cc: Gong, Zhigang
> > Subject: [Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.
> >
> > CLANG has sampler_t support since LLVM 3.3, let's switch to that type rather
> > than the old hacky way. One major problem is the sampler static checking. As
> > Gen platform has some hardware restrication and if the sampler value is a
> > const defined at kernel side, we need to use the value to optimize the code
> > path. Now the sampler_t becomes an obaque type now, the CLANG doesn't
> > support any arithmatic operations on it.
> > So we have to introduce a new pass to do this optimization.
> >
> > Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> > ---
> > backend/src/CMakeLists.txt | 1 +
> > backend/src/ir/function.hpp | 5 ++
> > backend/src/ir/sampler.cpp | 6 +-
> > backend/src/libocl/include/ocl_types.h | 4 +-
> > backend/src/libocl/src/ocl_image.cl | 27 ++++---
> > backend/src/llvm/llvm_gen_backend.cpp | 8 +-
> > backend/src/llvm/llvm_gen_backend.hpp | 1 +
> > backend/src/llvm/llvm_sampler_fix.cpp | 144
> > +++++++++++++++++++++++++++++++++
> > backend/src/llvm/llvm_to_gen.cpp | 1 +
> > src/cl_kernel.c | 12 ++-
> > 10 files changed, 184 insertions(+), 25 deletions(-) create mode 100644
> > backend/src/llvm/llvm_sampler_fix.cpp
> >
> > diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> > b4555f1..deba230 100644
> > --- a/backend/src/CMakeLists.txt
> > +++ b/backend/src/CMakeLists.txt
> > @@ -73,6 +73,7 @@ set (GBE_SRC
> > backend/program.cpp
> > backend/program.hpp
> > backend/program.h
> > + llvm/llvm_sampler_fix.cpp
> > llvm/llvm_bitcode_link.cpp
> > llvm/llvm_gen_backend.cpp
> > llvm/llvm_passes.cpp
> > diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index
> > 0f86fef..4aea087 100644
> > --- a/backend/src/ir/function.hpp
> > +++ b/backend/src/ir/function.hpp
> > @@ -204,6 +204,11 @@ namespace ir {
> > return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
> > isImage2dT() || isImage2dArrayT() || isImage3dT();
> > }
> > +
> > + bool isSamplerType() const {
> > + return typeName.compare("sampler_t") == 0;
> > + }
> > +
> > };
> >
> > /*! Create a function input argument */ diff --git
> > a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index
> > ba42acb..e4accca 100644
> > --- a/backend/src/ir/sampler.cpp
> > +++ b/backend/src/ir/sampler.cpp
> > @@ -49,11 +49,7 @@ namespace ir {
> > ir::FunctionArgument *arg = ctx->getFunction().getArg(samplerReg);
> > GBE_ASSERT(arg != NULL);
> >
> > - // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t,
> > we have to fix up the argument
> > - // type here. Once we switch to the LLVM and use the new data type
> > sampler_t, we can remove this
> > - // work around.
> > - arg->type = ir::FunctionArgument::SAMPLER;
> > - arg->info.typeName = "sampler_t";
> > + GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER);
> > int32_t id = ctx->getFunction().getArgID(arg);
> > GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
> >
> > diff --git a/backend/src/libocl/include/ocl_types.h
> > b/backend/src/libocl/include/ocl_types.h
> > index 49ac907..7798ee1 100644
> > --- a/backend/src/libocl/include/ocl_types.h
> > +++ b/backend/src/libocl/include/ocl_types.h
> > @@ -87,8 +87,8 @@ DEF(double);
> > // FIXME:
> > // This is a transitional hack to bypass the LLVM 3.3 built-in types.
> > // See the Khronos SPIR specification for handling of these types.
> > -#define sampler_t __sampler_t
> > -typedef const ushort __sampler_t;
> > +//#define sampler_t __sampler_t
> > +//typedef const ushort __sampler_t;
> >
> > /////////////////////////////////////////////////////////////////////////////
> > // OpenCL built-in event types
> > diff --git a/backend/src/libocl/src/ocl_image.cl
> > b/backend/src/libocl/src/ocl_image.cl
> > index c4ca2f8..6da8e90 100644
> > --- a/backend/src/libocl/src/ocl_image.cl
> > +++ b/backend/src/libocl/src/ocl_image.cl
> > @@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int,
> > image1d_buffer_t) // integer type surfaces correctly with
> > CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
> > // The work around is to use a LD message instead of normal sample
> > message.
> >
> > //////////////////////////////////////////////////////////////////////////////
> > /
> > +
> > +bool __gen_ocl_sampler_need_fix(sampler_t);
> > +bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
> > +
> > bool __gen_sampler_need_fix(const sampler_t sampler) {
> > - return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> > &&
> > - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> > + return __gen_ocl_sampler_need_fix(sampler);
> > +
> > +// return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> > &&
> > +// ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> > }
> >
> > bool __gen_sampler_need_rounding_fix(const sampler_t sampler) {
> > - return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> > + return __gen_ocl_sampler_need_rounding_fix(sampler);
> > +// return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> > }
> >
> > -
> > INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) {
> > if (tmpCoord < 0 && tmpCoord > -0x1p-20f) @@ -311,7 +317,7 @@
> > INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
> > __gen_sampler_need_rounding_fix(sampler)) \
> > tmpCoord = __gen_fixup_float_coord(tmpCoord); \
> > if (int_clamping_fix) { \
> > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
> > + if (!__gen_sampler_need_rounding_fix(sampler)) \
> > tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
> > tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \
> > return __gen_ocl_read_image ##suffix( \
> > @@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3
> > __gen_fixup_neg_boundary(float3 coord)
> > coord_type coord) \
> > { \
> > coord = __gen_validate_array_index(coord, cl_image); \
> > + sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE |
> > CLK_ADDRESS_NONE \
> > + | CLK_FILTER_NEAREST; \
> > return __gen_ocl_read_image ##suffix( \
> > - cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
> > \
> > - | CLK_FILTER_NEAREST, coord, 0); \
> > + cl_image, defaultSampler, coord, 0); \
> > }
> >
> > #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix,
> > coord_type) \
> > @@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4
> > __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
> > __gen_sampler_need_rounding_fix(sampler)) \
> > tmpCoord = __gen_fixup_float_coord(tmpCoord); \
> > if (int_clamping_fix) { \
> > - if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
> > + if (!__gen_sampler_need_rounding_fix(sampler)) \
> > tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
> > float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> > \
> > return __gen_ocl_read_image ##suffix( \
> > - cl_image, sampler, newCoord, 2); \
> > + cl_image, sampler, newCoord, 2); \
> > } \
> > } \
> > } \
> > - return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> > \
> > + return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> > \
> > }
> >
> > #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> > \
> > diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> > b/backend/src/llvm/llvm_gen_backend.cpp
> > index 512f437..36a6cb3 100644
> > --- a/backend/src/llvm/llvm_gen_backend.cpp
> > +++ b/backend/src/llvm/llvm_gen_backend.cpp
> > @@ -1591,6 +1591,12 @@ namespace gbe
> > continue;
> > }
> >
> > + if (llvmInfo.isSamplerType()) {
> > + ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo,
> > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> > + (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx);
> > + continue;
> > + }
> > +
> > if (type->isPointerTy() == false)
> > ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo,
> > getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> > else {
> > @@ -3013,7 +3019,7 @@ namespace gbe
> > // This is not a kernel argument sampler, we need to append it to sampler
> > set,
> > // and allocate a sampler slot for it.
> > const ir::Immediate &x = processConstantImm(CPV);
> > - GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() ==
> > ir::TYPE_S16, "Invalid sampler type");
> > + GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> > + ir::TYPE_S32, "Invalid sampler type");
> >
> > index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(),
> > &ctx);
> > } else {
> > diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> > b/backend/src/llvm/llvm_gen_backend.hpp
> > index ed7a57e..a496c16 100644
> > --- a/backend/src/llvm/llvm_gen_backend.hpp
> > +++ b/backend/src/llvm/llvm_gen_backend.hpp
> > @@ -129,6 +129,7 @@ namespace gbe
> > /* customized loop unrolling pass. */
> > llvm::LoopPass *createCustomLoopUnrollPass(); #endif
> > + llvm::FunctionPass* createSamplerFixPass();
> >
> > /*! Add all the function call of ocl to our bitcode. */
> > llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); diff
> > --git a/backend/src/llvm/llvm_sampler_fix.cpp
> > b/backend/src/llvm/llvm_sampler_fix.cpp
> > new file mode 100644
> > index 0000000..ec498d0
> > --- /dev/null
> > +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> > @@ -0,0 +1,144 @@
> > +/*
> > + * Copyright © 2012 Intel Corporation
> > + *
> > + * This library is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * This library is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with this library. If not, see
> > <http://www.gnu.org/licenses/>.
> > + *
> > + * Author: Ruiling, Song <ruiling.song at intel.com>
> > + *
> > + * Legalize unsupported integer data type i128/i256/...
> > + * right now, the implementation only consider little-endian system.
> > + *
> > + */
> > +#include "llvm/IR/Instructions.h"
> > +#include "llvm/Pass.h"
> > +#include "llvm/PassManager.h"
> > +
> > +#include "llvm/Config/llvm-config.h"
> > +#include "llvm/ADT/DenseMap.h"
> > +#include "llvm/ADT/PostOrderIterator.h"
> > +#include "llvm/IR/Function.h"
> > +#include "llvm/IR/InstrTypes.h"
> > +#include "llvm/IR/Instructions.h"
> > +#include "llvm/IR/IntrinsicInst.h"
> > +#include "llvm/IR/Module.h"
> > +#include "llvm/Pass.h"
> > +#include "llvm/IR/IRBuilder.h"
> > +#if LLVM_VERSION_MINOR >= 5
> > +#include "llvm/IR/CFG.h"
> > +#else
> > +#include "llvm/Support/CFG.h"
> > +#endif
> > +
> > +#include "llvm/Analysis/ConstantsScanner.h"
> > +
> > +#include "llvm_gen_backend.hpp"
> > +#include "ocl_common_defines.h"
> > +
> > +using namespace llvm;
> > +
> > +namespace gbe {
> > +
> > + class SamplerFix : public FunctionPass {
> > + public:
> > + SamplerFix() : FunctionPass(ID) {
> > +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
> > +
> > +initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()
> > +);
> > +#else
> > + initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> > +#endif
> > + }
> > +
> > + bool visitCallInst(CallInst *I) {
> > + Value *Callee = I->getCalledValue();
> > + const std::string fnName = Callee->getName();
> > + bool changed = false;
> > + Type *boolTy = IntegerType::get(I->getContext(), 1);
> > + Type *i32Ty = IntegerType::get(I->getContext(), 32);
> > +
> > + if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) {
> > +
> > + // return (((sampler & __CLK_ADDRESS_MASK) ==
> > CLK_ADDRESS_CLAMP) &&
> > + // ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> > + bool needFix = true;
> > + Value *needFixVal;
> > + if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> > + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> > + uint32_t samplerInt = ci->getZExtValue();
> > + needFix = ((samplerInt & __CLK_ADDRESS_MASK) ==
> > CLK_ADDRESS_CLAMP &&
> > + (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
> > + needFixVal = ConstantInt::get(boolTy, needFix);
> > + } else {
> > + IRBuilder<> Builder(I->getParent());
> > +
> > + Builder.SetInsertPoint(I);
> > + Value *addressMask = ConstantInt::get(i32Ty,
> > __CLK_ADDRESS_MASK);
> > + Value *addressMode = Builder.CreateAnd(I->getOperand(0),
> > addressMask);
> > + Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> > + Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> > clampInt);
> > + Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
> > + Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
> > + Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
> > + Value *isNearestMode = Builder.CreateICmpEQ(filterMode,
> > nearestInt);
> > + needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
> > + }
> > +
> > + I->replaceAllUsesWith(needFixVal);
> > + changed = true;
> > + } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix")
> > + == 0) {
> > +
> > + // return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> > + bool needFix = true;
> > + Value *needFixVal;
> > + if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> > + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> > + uint32_t samplerInt = ci->getZExtValue();
> > + needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
> > + needFixVal = ConstantInt::get(boolTy, needFix);
> > + } else {
> > + IRBuilder<> Builder(I->getParent());
> > + Builder.SetInsertPoint(I);
> > + Value *normalizeMask = ConstantInt::get(i32Ty,
> > CLK_NORMALIZED_COORDS_TRUE);
> > + Value *normalizeMode = Builder.CreateAnd(I->getOperand(0),
> > normalizeMask);
> > + needFixVal = Builder.CreateICmpEQ(normalizeMode,
> > ConstantInt::get(i32Ty, 0));
> > + }
> > + I->replaceAllUsesWith(needFixVal);
> > + changed = true;
> > + }
> > + return changed;
> > + }
> > +
> > + bool runOnFunction(Function& F) {
> > + bool changed = false;
> > + std::set<Instruction*> deadInsnSet;
> > + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
> > + if (dyn_cast<CallInst>(&*I)) {
> > + if (visitCallInst(dyn_cast<CallInst>(&*I))) {
> > + changed = true;
> > + deadInsnSet.insert(&*I);
> > + }
> > + }
> > + }
> > + for (auto it: deadInsnSet)
> > + it->eraseFromParent();
> > + return changed;
> > + }
> > +
> > + static char ID;
> > + };
> > +
> > + FunctionPass* createSamplerFixPass() {
> > + return new SamplerFix();
> > + }
> > + char SamplerFix::ID = 0;
> > +};
> > diff --git a/backend/src/llvm/llvm_to_gen.cpp
> > b/backend/src/llvm/llvm_to_gen.cpp
> > index e1bf12f..1c247b8 100644
> > --- a/backend/src/llvm/llvm_to_gen.cpp
> > +++ b/backend/src/llvm/llvm_to_gen.cpp
> > @@ -121,6 +121,7 @@ namespace gbe
> > MPM.add(createTypeBasedAliasAnalysisPass());
> > MPM.add(createBasicAliasAnalysisPass());
> > MPM.add(createIntrinsicLoweringPass());
> > + MPM.add(createSamplerFixPass());
> > MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
> >
> > MPM.add(createIPSCCPPass()); // IP SCCP
> > diff --git a/src/cl_kernel.c b/src/cl_kernel.c index a869515..177cb00 100644
> > --- a/src/cl_kernel.c
> > +++ b/src/cl_kernel.c
> > @@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t
> > sz, const void *value)
> > arg_sz = interp_kernel_get_arg_size(k->opaque, index);
> >
> > if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) {
> > - if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz ==
> > sizeof(cl_sampler)) {
> > - /* FIXME, this is a workaround for the case when a kernel arg
> > - defined a sampler_t but doesn't use it.*/
> > - arg_type = GBE_ARG_SAMPLER;
> > - } else
> > + if (arg_type != GBE_ARG_SAMPLER ||
> > + (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler)))
> > return CL_INVALID_ARG_SIZE;
> > }
> >
> > @@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz,
> > const void *value)
> > k->args[index].sampler = sampler;
> > cl_set_sampler_arg_slot(k, index, sampler);
> > offset = interp_kernel_get_curbe_offset(k->opaque,
> > GBE_CURBE_KERNEL_ARGUMENT, index);
> > - assert(offset + 2 <= k->curbe_sz);
> > - memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
> > + //assert(arg_sz == 4);
> > + assert(offset + 4 <= k->curbe_sz);
> > + memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4);
> > return CL_SUCCESS;
> > }
> >
> > --
> > 1.8.3.2
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list