[Beignet] [PATCH 1/3] GBE: switch to CLANG native sampler_t.

Zhigang Gong zhigang.gong at intel.com
Sun Dec 14 17:02:22 PST 2014


CLANG has sampler_t support since LLVM 3.3, let's switch to that type
rather than the old hacky way. One major problem is the sampler static
checking. As Gen platform has some hardware restrication and if the
sampler value is a const defined at kernel side, we need to use the
value to optimize the code path. Now the sampler_t becomes an obaque
type now, the CLANG doesn't support any arithmatic operations on it.
So we have to introduce a new pass to do this optimization.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/CMakeLists.txt             |   1 +
 backend/src/ir/function.hpp            |   5 ++
 backend/src/ir/sampler.cpp             |   6 +-
 backend/src/libocl/include/ocl_types.h |   4 +-
 backend/src/libocl/src/ocl_image.cl    |  27 ++++---
 backend/src/llvm/llvm_gen_backend.cpp  |   8 +-
 backend/src/llvm/llvm_gen_backend.hpp  |   1 +
 backend/src/llvm/llvm_sampler_fix.cpp  | 144 +++++++++++++++++++++++++++++++++
 backend/src/llvm/llvm_to_gen.cpp       |   1 +
 src/cl_kernel.c                        |  12 ++-
 10 files changed, 184 insertions(+), 25 deletions(-)
 create mode 100644 backend/src/llvm/llvm_sampler_fix.cpp

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index b4555f1..deba230 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -73,6 +73,7 @@ set (GBE_SRC
     backend/program.cpp
     backend/program.hpp
     backend/program.h
+    llvm/llvm_sampler_fix.cpp
     llvm/llvm_bitcode_link.cpp
     llvm/llvm_gen_backend.cpp
     llvm/llvm_passes.cpp
diff --git a/backend/src/ir/function.hpp b/backend/src/ir/function.hpp
index 0f86fef..4aea087 100644
--- a/backend/src/ir/function.hpp
+++ b/backend/src/ir/function.hpp
@@ -204,6 +204,11 @@ namespace ir {
         return isImage1dT() || isImage1dArrayT() || isImage1dBufferT() ||
                isImage2dT() || isImage2dArrayT() || isImage3dT();
       }
+
+      bool isSamplerType() const {
+        return typeName.compare("sampler_t") == 0;
+      }
+
     };
 
     /*! Create a function input argument */
diff --git a/backend/src/ir/sampler.cpp b/backend/src/ir/sampler.cpp
index ba42acb..e4accca 100644
--- a/backend/src/ir/sampler.cpp
+++ b/backend/src/ir/sampler.cpp
@@ -49,11 +49,7 @@ namespace ir {
     ir::FunctionArgument *arg =  ctx->getFunction().getArg(samplerReg);
     GBE_ASSERT(arg != NULL);
 
-    // XXX As LLVM 3.2/3.1 doesn't have a new data type for the sampler_t, we have to fix up the argument
-    // type here. Once we switch to the LLVM and use the new data type sampler_t, we can remove this
-    // work around.
-    arg->type = ir::FunctionArgument::SAMPLER;
-    arg->info.typeName = "sampler_t";
+    GBE_ASSERT(arg->type == ir::FunctionArgument::SAMPLER);
     int32_t id = ctx->getFunction().getArgID(arg);
     GBE_ASSERT(id < (1 << __CLK_SAMPLER_ARG_BITS));
 
diff --git a/backend/src/libocl/include/ocl_types.h b/backend/src/libocl/include/ocl_types.h
index 49ac907..7798ee1 100644
--- a/backend/src/libocl/include/ocl_types.h
+++ b/backend/src/libocl/include/ocl_types.h
@@ -87,8 +87,8 @@ DEF(double);
 // FIXME:
 // This is a transitional hack to bypass the LLVM 3.3 built-in types.
 // See the Khronos SPIR specification for handling of these types.
-#define sampler_t __sampler_t
-typedef const ushort __sampler_t;
+//#define sampler_t __sampler_t
+//typedef const ushort __sampler_t;
 
 /////////////////////////////////////////////////////////////////////////////
 // OpenCL built-in event types
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index c4ca2f8..6da8e90 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -136,18 +136,24 @@ GEN_VALIDATE_ARRAY_INDEX(int, image1d_buffer_t)
 // integer type surfaces correctly with CLK_ADDRESS_CLAMP and CLK_FILTER_NEAREST.
 // The work around is to use a LD message instead of normal sample message.
 ///////////////////////////////////////////////////////////////////////////////
+
+bool __gen_ocl_sampler_need_fix(sampler_t);
+bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
+
 bool __gen_sampler_need_fix(const sampler_t sampler)
 {
-  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
-          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
+  return __gen_ocl_sampler_need_fix(sampler);
+
+//  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
+//          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
 }
 
 bool __gen_sampler_need_rounding_fix(const sampler_t sampler)
 {
-  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
+  return __gen_ocl_sampler_need_rounding_fix(sampler);
+//  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
 }
 
-
 INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord)
 {
   if (tmpCoord < 0 && tmpCoord > -0x1p-20f)
@@ -311,7 +317,7 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
             __gen_sampler_need_rounding_fix(sampler))                         \
           tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
         if (int_clamping_fix) {                                               \
-            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
+            if (!__gen_sampler_need_rounding_fix(sampler))                    \
               tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
             tmpCoord = __gen_fixup_neg_boundary(tmpCoord);                    \
             return __gen_ocl_read_image ##suffix(                             \
@@ -328,9 +334,10 @@ INLINE_OVERLOADABLE float3 __gen_fixup_neg_boundary(float3 coord)
                                                coord_type coord)              \
   {                                                                           \
     coord = __gen_validate_array_index(coord, cl_image);                      \
+    sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+                               | CLK_FILTER_NEAREST;                          \
     return __gen_ocl_read_image ##suffix(                                     \
-             cl_image, CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE         \
-             | CLK_FILTER_NEAREST, coord, 0);                                 \
+             cl_image, defaultSampler, coord, 0);                             \
   }
 
 #define DECL_WRITE_IMAGE(image_type, image_data_type, suffix, coord_type)     \
@@ -419,15 +426,15 @@ INLINE_OVERLOADABLE int4 __gen_fixup_1darray_coord(int2 coord, image1d_array_t i
             __gen_sampler_need_rounding_fix(sampler))                         \
           tmpCoord = __gen_fixup_float_coord(tmpCoord);                       \
         if (int_clamping_fix) {                                               \
-            if (sampler & CLK_NORMALIZED_COORDS_TRUE)                         \
+            if (!__gen_sampler_need_rounding_fix(sampler))                    \
               tmpCoord = __gen_denormalize_coord(cl_image, tmpCoord);         \
             float4 newCoord = __gen_fixup_1darray_coord(tmpCoord, cl_image);  \
             return __gen_ocl_read_image ##suffix(                             \
-                     cl_image, sampler, newCoord, 2);                       \
+                     cl_image, sampler, newCoord, 2);                         \
         }                                                                     \
       }                                                                       \
     }                                                                         \
-    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);  \
+    return  __gen_ocl_read_image ##suffix(cl_image, sampler, tmpCoord, 0);    \
   }
 
 #define DECL_IMAGE_1DArray(int_clamping_fix, image_data_type, suffix)         \
diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
index 512f437..36a6cb3 100644
--- a/backend/src/llvm/llvm_gen_backend.cpp
+++ b/backend/src/llvm/llvm_gen_backend.cpp
@@ -1591,6 +1591,12 @@ namespace gbe
           continue;
         }
 
+        if (llvmInfo.isSamplerType()) {
+          ctx.input(argName, ir::FunctionArgument::SAMPLER, reg, llvmInfo, getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
+          (void)ctx.getFunction().getSamplerSet()->append(reg, &ctx);
+          continue;
+        }
+
         if (type->isPointerTy() == false)
           ctx.input(argName, ir::FunctionArgument::VALUE, reg, llvmInfo, getTypeByteSize(unit, type), getAlignmentByte(unit, type), 0);
         else {
@@ -3013,7 +3019,7 @@ namespace gbe
       // This is not a kernel argument sampler, we need to append it to sampler set,
       // and allocate a sampler slot for it.
       const ir::Immediate &x = processConstantImm(CPV);
-      GBE_ASSERTM(x.getType() == ir::TYPE_U16 || x.getType() == ir::TYPE_S16, "Invalid sampler type");
+      GBE_ASSERTM(x.getType() == ir::TYPE_U32 || x.getType() == ir::TYPE_S32, "Invalid sampler type");
 
       index = ctx.getFunction().getSamplerSet()->append(x.getIntegerValue(), &ctx);
     } else {
diff --git a/backend/src/llvm/llvm_gen_backend.hpp b/backend/src/llvm/llvm_gen_backend.hpp
index ed7a57e..a496c16 100644
--- a/backend/src/llvm/llvm_gen_backend.hpp
+++ b/backend/src/llvm/llvm_gen_backend.hpp
@@ -129,6 +129,7 @@ namespace gbe
   /* customized loop unrolling pass. */
   llvm::LoopPass *createCustomLoopUnrollPass();
 #endif
+  llvm::FunctionPass* createSamplerFixPass();
 
   /*! Add all the function call of ocl to our bitcode. */
   llvm::Module* runBitCodeLinker(llvm::Module *mod, bool strictMath);
diff --git a/backend/src/llvm/llvm_sampler_fix.cpp b/backend/src/llvm/llvm_sampler_fix.cpp
new file mode 100644
index 0000000..ec498d0
--- /dev/null
+++ b/backend/src/llvm/llvm_sampler_fix.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Ruiling, Song <ruiling.song at intel.com>
+ *
+ * Legalize unsupported integer data type i128/i256/...
+ * right now, the implementation only consider little-endian system.
+ *
+ */
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/IRBuilder.h"
+#if LLVM_VERSION_MINOR >= 5
+#include "llvm/IR/CFG.h"
+#else
+#include "llvm/Support/CFG.h"
+#endif
+
+#include "llvm/Analysis/ConstantsScanner.h"
+
+#include "llvm_gen_backend.hpp"
+#include "ocl_common_defines.h"
+
+using namespace llvm;
+
+namespace gbe {
+
+  class SamplerFix : public FunctionPass {
+  public:
+    SamplerFix() : FunctionPass(ID) {
+#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
+      initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+#else
+      initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+#endif
+    }
+
+    bool visitCallInst(CallInst *I) {
+      Value *Callee = I->getCalledValue();
+      const std::string fnName = Callee->getName();
+      bool changed = false;
+      Type *boolTy = IntegerType::get(I->getContext(), 1);
+      Type *i32Ty = IntegerType::get(I->getContext(), 32);
+
+      if (fnName.compare("__gen_ocl_sampler_need_fix") == 0) {
+
+        //  return (((sampler & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP) &&
+        //          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
+        bool needFix = true;
+        Value *needFixVal;
+        if (dyn_cast<ConstantInt>(I->getOperand(0))) {
+          const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
+          uint32_t samplerInt = ci->getZExtValue();
+          needFix = ((samplerInt & __CLK_ADDRESS_MASK) == CLK_ADDRESS_CLAMP &&
+                     (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
+          needFixVal = ConstantInt::get(boolTy, needFix);
+        } else {
+          IRBuilder<> Builder(I->getParent());
+
+          Builder.SetInsertPoint(I);
+          Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
+          Value *addressMode = Builder.CreateAnd(I->getOperand(0), addressMask);
+          Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
+          Value *isClampMode = Builder.CreateICmpEQ(addressMode, clampInt);
+          Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
+          Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
+          Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
+          Value *isNearestMode = Builder.CreateICmpEQ(filterMode, nearestInt);
+          needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
+        }
+
+        I->replaceAllUsesWith(needFixVal);
+        changed = true;
+      } else if (fnName.compare("__gen_ocl_sampler_need_rounding_fix") == 0) {
+
+        //  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
+        bool needFix = true;
+        Value *needFixVal;
+        if (dyn_cast<ConstantInt>(I->getOperand(0))) {
+          const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
+          uint32_t samplerInt = ci->getZExtValue();
+          needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
+          needFixVal = ConstantInt::get(boolTy, needFix);
+        } else {
+          IRBuilder<> Builder(I->getParent());
+          Builder.SetInsertPoint(I);
+          Value *normalizeMask = ConstantInt::get(i32Ty, CLK_NORMALIZED_COORDS_TRUE);
+          Value *normalizeMode = Builder.CreateAnd(I->getOperand(0), normalizeMask);
+          needFixVal = Builder.CreateICmpEQ(normalizeMode, ConstantInt::get(i32Ty, 0));
+        }
+        I->replaceAllUsesWith(needFixVal);
+        changed = true;
+      }
+      return changed;
+    }
+
+    bool runOnFunction(Function& F) {
+      bool changed = false;
+      std::set<Instruction*> deadInsnSet;
+      for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+        if (dyn_cast<CallInst>(&*I)) {
+          if (visitCallInst(dyn_cast<CallInst>(&*I))) {
+            changed = true;
+            deadInsnSet.insert(&*I);
+          }
+        }
+      }
+      for (auto it: deadInsnSet)
+        it->eraseFromParent();
+      return changed;
+    }
+
+    static char ID;
+  };
+
+  FunctionPass* createSamplerFixPass() {
+    return new SamplerFix();
+  }
+  char SamplerFix::ID = 0;
+};
diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index e1bf12f..1c247b8 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -121,6 +121,7 @@ namespace gbe
     MPM.add(createTypeBasedAliasAnalysisPass());
     MPM.add(createBasicAliasAnalysisPass());
     MPM.add(createIntrinsicLoweringPass());
+    MPM.add(createSamplerFixPass());
     MPM.add(createGlobalOptimizerPass());     // Optimize out global vars
 
     MPM.add(createIPSCCPPass());              // IP SCCP
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index a869515..177cb00 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -114,11 +114,8 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
   arg_sz = interp_kernel_get_arg_size(k->opaque, index);
 
   if (UNLIKELY(arg_type != GBE_ARG_LOCAL_PTR && arg_sz != sz)) {
-    if (arg_sz == 2 && arg_type == GBE_ARG_VALUE && sz == sizeof(cl_sampler)) {
-      /* FIXME, this is a workaround for the case when a kernel arg
-         defined a sampler_t but doesn't use it.*/
-      arg_type = GBE_ARG_SAMPLER;
-    } else
+    if (arg_type != GBE_ARG_SAMPLER ||
+        (arg_type == GBE_ARG_SAMPLER && sz != sizeof(cl_sampler)))
       return CL_INVALID_ARG_SIZE;
   }
 
@@ -182,8 +179,9 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
     k->args[index].sampler = sampler;
     cl_set_sampler_arg_slot(k, index, sampler);
     offset = interp_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
-    assert(offset + 2 <= k->curbe_sz);
-    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 2);
+    //assert(arg_sz == 4);
+    assert(offset + 4 <= k->curbe_sz);
+    memcpy(k->curbe + offset, &sampler->clkSamplerValue, 4);
     return CL_SUCCESS;
   }
 
-- 
1.8.3.2



More information about the Beignet mailing list