[Beignet] [PATCH V2] Backend: Fix performance regression with sampler refine fro LLVM40

Yang, Rong R rong.r.yang at intel.com
Thu May 18 07:01:24 UTC 2017


LGTM, pushed, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Wednesday, May 17, 2017 17:02
> To: beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: [Beignet] [PATCH V2] Backend: Fix performance regression with
> sampler refine fro LLVM40
> 
> From: Pan Xiuli <xiuli.pan at intel.com>
> 
> After the refine we can not know if a sampler is a constant initialized or not.
> Then the compiler optimization for constant sampler will break and we will
> runtime decide which SAMPLE instruction will use.
> Now fix the sampler refine for LLVM40 to enable the constant check.
> V2: Fix a typo of function __gen_ocl_sampler_to_int type.
> 
> Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> ---
>  backend/src/libocl/src/ocl_image.cl   |  9 ++++----
>  backend/src/llvm/llvm_sampler_fix.cpp | 41
> +++++++++++++++++++++++++++++++----
>  2 files changed, 41 insertions(+), 9 deletions(-)
> 
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index e66aa15..2febfda 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -295,18 +295,17 @@ GEN_VALIDATE_ARRAY_INDEX(int, read_write
> image1d_buffer_t)  // The work around is to use a LD message instead of
> normal sample message.
> 
> //////////////////////////////////////////////////////////////////////////////
> /
> 
> -bool __gen_ocl_sampler_need_fix(int);
> -bool __gen_ocl_sampler_need_rounding_fix(int);
> -int __gen_ocl_sampler_to_int(sampler_t);
> +bool __gen_ocl_sampler_need_fix(sampler_t);
> +bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
> 
>  bool __gen_sampler_need_fix(const sampler_t sampler)  {
> -  return
> __gen_ocl_sampler_need_fix(__gen_ocl_sampler_to_int(sampler));
> +  return __gen_ocl_sampler_need_fix(sampler);
>  }
> 
>  bool __gen_sampler_need_rounding_fix(const sampler_t sampler)  {
> -  return
> __gen_ocl_sampler_need_rounding_fix(__gen_ocl_sampler_to_int(sample
> r));
> +  return __gen_ocl_sampler_need_rounding_fix(sampler);
>  }
> 
>  INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) diff
> --git a/backend/src/llvm/llvm_sampler_fix.cpp
> b/backend/src/llvm/llvm_sampler_fix.cpp
> index 2e8bcf9..c249755 100644
> --- a/backend/src/llvm/llvm_sampler_fix.cpp
> +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> @@ -55,9 +55,17 @@ namespace gbe {
>          //          ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
>          bool needFix = true;
>          Value *needFixVal;
> +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> +        CallInst *init = dyn_cast<CallInst>(I->getOperand(0));
> +        if (init && init->getCalledValue()-
> >getName().compare("__translate_sampler_initializer"))
> +        {
> +          const ConstantInt *ci = dyn_cast<ConstantInt>(init->getOperand(0));
> +          uint32_t samplerInt = ci->getZExtValue(); #else
>          if (dyn_cast<ConstantInt>(I->getOperand(0))) {
>            const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
>            uint32_t samplerInt = ci->getZExtValue();
> +#endif
>            needFix = ((samplerInt & __CLK_ADDRESS_MASK) ==
> CLK_ADDRESS_CLAMP &&
>                       (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
>            needFixVal = ConstantInt::get(boolTy, needFix); @@ -65,14 +73,24 @@
> namespace gbe {
>            IRBuilder<> Builder(I->getParent());
> 
>            Builder.SetInsertPoint(I);
> +
>            Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
> -          Value *addressMode = Builder.CreateAnd(I->getOperand(0),
> addressMask);
>            Value *clampInt =  ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> -          Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> clampInt);
>            Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
> -          Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
>            Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
> +
> +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> +          Module *M = I->getParent()->getParent()->getParent();
> +          Value* samplerCvt = M-
> >getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I-
> >getOperand(0)->getType(), nullptr);
> +          Value *samplerVal = Builder.CreateCall(samplerCvt,
> +{I->getOperand(0)}); #else
> +          Value *samplerVal = I->getOperand(0); #endif
> +          Value *addressMode = Builder.CreateAnd(samplerVal, addressMask);
> +          Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> clampInt);
> +          Value *filterMode = Builder.CreateAnd(samplerVal,
> +filterMask);
>            Value *isNearestMode = Builder.CreateICmpEQ(filterMode,
> nearestInt);
> +
>            needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
>          }
> 
> @@ -83,16 +101,31 @@ namespace gbe {
>          //  return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
>          bool needFix = true;
>          Value *needFixVal;
> + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> +        CallInst *init = dyn_cast<CallInst>(I->getOperand(0));
> +        if (init && init->getCalledValue()-
> >getName().compare("__translate_sampler_initializer"))
> +        {
> +          const ConstantInt *ci = dyn_cast<ConstantInt>(init->getOperand(0));
> +          uint32_t samplerInt = ci->getZExtValue(); #else
>          if (dyn_cast<ConstantInt>(I->getOperand(0))) {
>            const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
>            uint32_t samplerInt = ci->getZExtValue();
> +#endif
>            needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
>            needFixVal = ConstantInt::get(boolTy, needFix);
>          } else {
>            IRBuilder<> Builder(I->getParent());
>            Builder.SetInsertPoint(I);
> +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> +          Module *M = I->getParent()->getParent()->getParent();
> +          Value* samplerCvt = M-
> >getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I-
> >getOperand(0)->getType(), nullptr);
> +          Value *samplerVal = Builder.CreateCall(samplerCvt,
> +{I->getOperand(0)}); #else
> +          Value *samplerVal = I->getOperand(0); #endif
>            Value *normalizeMask = ConstantInt::get(i32Ty,
> CLK_NORMALIZED_COORDS_TRUE);
> -          Value *normalizeMode = Builder.CreateAnd(I->getOperand(0),
> normalizeMask);
> +          Value *normalizeMode = Builder.CreateAnd(samplerVal,
> + normalizeMask);
>            needFixVal = Builder.CreateICmpEQ(normalizeMode,
> ConstantInt::get(i32Ty, 0));
>          }
>          I->replaceAllUsesWith(needFixVal);
> --
> 2.7.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list