[Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.
Yang, Rong R
rong.r.yang at intel.com
Wed Dec 17 23:43:20 PST 2014
Need change llvm_sampler_fix.cpp's author and comment.
The others of this patchset LGTM.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Zhigang Gong
> Sent: Monday, December 15, 2014 09:02
> To: beignet at lists.freedesktop.org
> Cc: Gong, Zhigang
> Subject: [Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.
>
> CLANG has sampler_t support since LLVM 3.3, let's switch to that type rather
> than the old hacky way. One major problem is the sampler static checking. As
> Gen platform has some hardware restrication and if the sampler value is a
> const defined at kernel side, we need to use the value to optimize the code
> path. Now the sampler_t becomes an obaque type now, the CLANG doesn't
> support any arithmatic operations on it.
> So we have to introduce a new pass to do this optimization.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> backend/src/CMakeLists.txt | 1 +
> backend/src/ir/function.hpp | 5 ++
> backend/src/ir/sampler.cpp | 6 +-
> backend/src/libocl/include/ocl_types.h | 4 +-
> backend/src/libocl/src/ocl_image.cl | 27 ++++---
> backend/src/llvm/llvm_gen_backend.cpp | 8 +-
> backend/src/llvm/llvm_gen_backend.hpp | 1 +
> backend/src/llvm/llvm_sampler_fix.cpp | 144
> +++++++++++++++++++++++++++++++++
> backend/src/llvm/llvm_to_gen.cpp | 1 +
> src/cl_kernel.c | 12 ++-
> 10 files changed, 184 insertions(+), 25 deletions(-) create mode 100644
> backend/src/llvm/llvm_sampler_fix.cpp
>
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> b4555f1..deba230 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -73,6 +73,7 @@ set (GBE_SRC
> backend/program.cpp
> backend/program.hpp
> backend/program.h
> + llvm/llvm_sampler_fix.cpp
> llvm/llvm_bitcode_link.cpp
> llvm/llvm_gen_backend.cpp
> llvm/llvm_passes.cpp
> diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp index
> 0f86fef..4aea087 100644
> --- a/backend/src/ir/function.hpp
> +++ b/backend/src/ir/function.hpp
> @@ -204,6 +204,11 @@ namespace ir {
> return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
> isImage2dT() || isImage2dArrayT() || isImage3dT();
> }
> +
> + bool isSamplerType() const {
> + return typeName.compare("sampler_t") == 0;
> + }
> +
> };
>
> /*! Create a function input argument */ diff --git
> a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp index
> ba42acb..e4accca 100644
> --- a/backend/src/ir/sampler.cpp
> +++ b/backend/src/ir/sampler.cpp
> @@ -49,11 +49,7 @@ namespace ir {
> ir::FunctionArgument *arg = ctx->getFunction().getArg(samplerReg);
> GBE_ASSERT(arg != NULL);
>
> - // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t,
> we have to fix up the argument
> - // type here. Once we switch to the LLVM and use the new data type
> sampler_t, we can remove this
> - // work around.
> - arg->type = ir::FunctionArgument::SAMPLER;
> - arg->info.typeName = "sampler_t";
> + GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER);
> int32_t id = ctx->getFunction().getArgID(arg);
> GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
>
> diff --git a/backend/src/libocl/include/ocl_types.h
> b/backend/src/libocl/include/ocl_types.h
> index 49ac907..7798ee1 100644
> --- a/backend/src/libocl/include/ocl_types.h
> +++ b/backend/src/libocl/include/ocl_types.h
> @@ -87,8 +87,8 @@ DEF(double);
> // FIXME:
> // This is a transitional hack to bypass the LLVM 3.3 built-in types.
> // See the Khronos SPIR specification for handling of these types.
> -#define sampler_t __sampler_t
> -typedef const ushort __sampler_t;
> +//#define sampler_t __sampler_t
> +//typedef const ushort __sampler_t;
>
> /////////////////////////////////////////////////////////////////////////////
> // OpenCL built-in event types
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index c4ca2f8..6da8e90 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int,
> image1d_buffer_t) // integer type surfaces correctly with
> CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
> // The work around is to use a LD message instead of normal sample
> message.
>
> //////////////////////////////////////////////////////////////////////////////
> /
> +
> +bool __gen_ocl_sampler_need_fix(sampler_t);
> +bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
> +
> bool __gen_sampler_need_fix(const sampler_t sampler) {
> - return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> &&
> - ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> + return __gen_ocl_sampler_need_fix(sampler);
> +
> +// return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP)
> &&
> +// ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> }
>
> bool __gen_sampler_need_rounding_fix(const sampler_t sampler) {
> - return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> + return __gen_ocl_sampler_need_rounding_fix(sampler);
> +// return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> }
>
> -
> INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) {
> if (tmpCoord < 0 && tmpCoord > -0x1p-20f) @@ -311,7 +317,7 @@
> INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
> __gen_sampler_need_rounding_fix(sampler)) \
> tmpCoord = __gen_fixup_float_coord(tmpCoord); \
> if (int_clamping_fix) { \
> - if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
> + if (!__gen_sampler_need_rounding_fix(sampler)) \
> tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
> tmpCoord = __gen_fixup_neg_boundary(tmpCoord); \
> return __gen_ocl_read_image ##suffix( \
> @@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3
> __gen_fixup_neg_boundary(float3 coord)
> coord_type coord) \
> { \
> coord = __gen_validate_array_index(coord, cl_image); \
> + sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE |
> CLK_ADDRESS_NONE \
> + | CLK_FILTER_NEAREST; \
> return __gen_ocl_read_image ##suffix( \
> - cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE
> \
> - | CLK_FILTER_NEAREST, coord, 0); \
> + cl_image, defaultSampler, coord, 0); \
> }
>
> #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix,
> coord_type) \
> @@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4
> __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
> __gen_sampler_need_rounding_fix(sampler)) \
> tmpCoord = __gen_fixup_float_coord(tmpCoord); \
> if (int_clamping_fix) { \
> - if (sampler & CLK_NORMALIZED_COORDS_TRUE) \
> + if (!__gen_sampler_need_rounding_fix(sampler)) \
> tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord); \
> float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);
> \
> return __gen_ocl_read_image ##suffix( \
> - cl_image, sampler, newCoord, 2); \
> + cl_image, sampler, newCoord, 2); \
> } \
> } \
> } \
> - return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> \
> + return __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);
> \
> }
>
> #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)
> \
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp
> b/backend/src/llvm/llvm_gen_backend.cpp
> index 512f437..36a6cb3 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1591,6 +1591,12 @@ namespace gbe
> continue;
> }
>
> + if (llvmInfo.isSamplerType()) {
> + ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo,
> getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> + (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx);
> + continue;
> + }
> +
> if (type->isPointerTy() == false)
> ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo,
> getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
> else {
> @@ -3013,7 +3019,7 @@ namespace gbe
> // This is not a kernel argument sampler, we need to append it to sampler
> set,
> // and allocate a sampler slot for it.
> const ir::Immediate &x = processConstantImm(CPV);
> - GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() ==
> ir::TYPE_S16, "Invalid sampler type");
> + GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() ==
> + ir::TYPE_S32, "Invalid sampler type");
>
> index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(),
> &ctx);
> } else {
> diff --git a/backend/src/llvm/llvm_gen_backend.hpp
> b/backend/src/llvm/llvm_gen_backend.hpp
> index ed7a57e..a496c16 100644
> --- a/backend/src/llvm/llvm_gen_backend.hpp
> +++ b/backend/src/llvm/llvm_gen_backend.hpp
> @@ -129,6 +129,7 @@ namespace gbe
> /* customized loop unrolling pass. */
> llvm::LoopPass *createCustomLoopUnrollPass(); #endif
> + llvm::FunctionPass* createSamplerFixPass();
>
> /*! Add all the function call of ocl to our bitcode. */
> llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath); diff
> --git a/backend/src/llvm/llvm_sampler_fix.cpp
> b/backend/src/llvm/llvm_sampler_fix.cpp
> new file mode 100644
> index 0000000..ec498d0
> --- /dev/null
> +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> @@ -0,0 +1,144 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + * Author: Ruiling, Song <ruiling.song at intel.com>
> + *
> + * Legalize unsupported integer data type i128/i256/...
> + * right now, the implementation only consider little-endian system.
> + *
> + */
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/Pass.h"
> +#include "llvm/PassManager.h"
> +
> +#include "llvm/Config/llvm-config.h"
> +#include "llvm/ADT/DenseMap.h"
> +#include "llvm/ADT/PostOrderIterator.h"
> +#include "llvm/IR/Function.h"
> +#include "llvm/IR/InstrTypes.h"
> +#include "llvm/IR/Instructions.h"
> +#include "llvm/IR/IntrinsicInst.h"
> +#include "llvm/IR/Module.h"
> +#include "llvm/Pass.h"
> +#include "llvm/IR/IRBuilder.h"
> +#if LLVM_VERSION_MINOR >= 5
> +#include "llvm/IR/CFG.h"
> +#else
> +#include "llvm/Support/CFG.h"
> +#endif
> +
> +#include "llvm/Analysis/ConstantsScanner.h"
> +
> +#include "llvm_gen_backend.hpp"
> +#include "ocl_common_defines.h"
> +
> +using namespace llvm;
> +
> +namespace gbe {
> +
> + class SamplerFix : public FunctionPass {
> + public:
> + SamplerFix() : FunctionPass(ID) {
> +#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
> +
> +initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry()
> +);
> +#else
> + initializeDominatorTreePass(*PassRegistry::getPassRegistry());
> +#endif
> + }
> +
> + bool visitCallInst(CallInst *I) {
> + Value *Callee = I->getCalledValue();
> + const std::string fnName = Callee->getName();
> + bool changed = false;
> + Type *boolTy = IntegerType::get(I->getContext(), 1);
> + Type *i32Ty = IntegerType::get(I->getContext(), 32);
> +
> + if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) {
> +
> + // return (((sampler & __CLK_ADDRESS_MASK) ==
> CLK_ADDRESS_CLAMP) &&
> + // ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> + bool needFix = true;
> + Value *needFixVal;
> + if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> + uint32_t samplerInt = ci->getZExtValue();
> + needFix = ((samplerInt & __CLK_ADDRESS_MASK) ==
> CLK_ADDRESS_CLAMP &&
> + (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
> + needFixVal = ConstantInt::get(boolTy, needFix);
> + } else {
> + IRBuilder<> Builder(I->getParent());
> +
> + Builder.SetInsertPoint(I);
> + Value *addressMask = ConstantInt::get(i32Ty,
> __CLK_ADDRESS_MASK);
> + Value *addressMode = Builder.CreateAnd(I->getOperand(0),
> addressMask);
> + Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> + Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> clampInt);
> + Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
> + Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
> + Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
> + Value *isNearestMode = Builder.CreateICmpEQ(filterMode,
> nearestInt);
> + needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
> + }
> +
> + I->replaceAllUsesWith(needFixVal);
> + changed = true;
> + } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix")
> + == 0) {
> +
> + // return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> + bool needFix = true;
> + Value *needFixVal;
> + if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> + const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> + uint32_t samplerInt = ci->getZExtValue();
> + needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
> + needFixVal = ConstantInt::get(boolTy, needFix);
> + } else {
> + IRBuilder<> Builder(I->getParent());
> + Builder.SetInsertPoint(I);
> + Value *normalizeMask = ConstantInt::get(i32Ty,
> CLK_NORMALIZED_COORDS_TRUE);
> + Value *normalizeMode = Builder.CreateAnd(I->getOperand(0),
> normalizeMask);
> + needFixVal = Builder.CreateICmpEQ(normalizeMode,
> ConstantInt::get(i32Ty, 0));
> + }
> + I->replaceAllUsesWith(needFixVal);
> + changed = true;
> + }
> + return changed;
> + }
> +
> + bool runOnFunction(Function& F) {
> + bool changed = false;
> + std::set<Instruction*> deadInsnSet;
> + for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
> + if (dyn_cast<CallInst>(&*I)) {
> + if (visitCallInst(dyn_cast<CallInst>(&*I))) {
> + changed = true;
> + deadInsnSet.insert(&*I);
> + }
> + }
> + }
> + for (auto it: deadInsnSet)
> + it->eraseFromParent();
> + return changed;
> + }
> +
> + static char ID;
> + };
> +
> + FunctionPass* createSamplerFixPass() {
> + return new SamplerFix();
> + }
> + char SamplerFix::ID = 0;
> +};
> diff --git a/backend/src/llvm/llvm_to_gen.cpp
> b/backend/src/llvm/llvm_to_gen.cpp
> index e1bf12f..1c247b8 100644
> --- a/backend/src/llvm/llvm_to_gen.cpp
> +++ b/backend/src/llvm/llvm_to_gen.cpp
> @@ -121,6 +121,7 @@ namespace gbe
> MPM.add(createTypeBasedAliasAnalysisPass());
> MPM.add(createBasicAliasAnalysisPass());
> MPM.add(createIntrinsicLoweringPass());
> + MPM.add(createSamplerFixPass());
> MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
>
> MPM.add(createIPSCCPPass()); // IP SCCP
> diff --git a/src/cl_kernel.c b/src/cl_kernel.c index a869515..177cb00 100644
> --- a/src/cl_kernel.c
> +++ b/src/cl_kernel.c
> @@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t
> sz, const void *value)
> arg_sz = interp_kernel_get_arg_size(k->opaque, index);
>
> if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) {
> - if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz ==
> sizeof(cl_sampler)) {
> - /* FIXME, this is a workaround for the case when a kernel arg
> - defined a sampler_t but doesn't use it.*/
> - arg_type = GBE_ARG_SAMPLER;
> - } else
> + if (arg_type != GBE_ARG_SAMPLER ||
> + (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler)))
> return CL_INVALID_ARG_SIZE;
> }
>
> @@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz,
> const void *value)
> k->args[index].sampler = sampler;
> cl_set_sampler_arg_slot(k, index, sampler);
> offset = interp_kernel_get_curbe_offset(k->opaque,
> GBE_CURBE_KERNEL_ARGUMENT, index);
> - assert(offset + 2 <= k->curbe_sz);
> - memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
> + //assert(arg_sz == 4);
> + assert(offset + 4 <= k->curbe_sz);
> + memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4);
> return CL_SUCCESS;
> }
>
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list