[Beignet] [PATCH V2] Backend: Fix performance regression with sampler refine fro LLVM40
Yang, Rong R
rong.r.yang at intel.com
Thu May 18 07:01:24 UTC 2017
LGTM, pushed, thanks.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Wednesday, May 17, 2017 17:02
> To: beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: [Beignet] [PATCH V2] Backend: Fix performance regression with
> sampler refine fro LLVM40
>
> From: Pan Xiuli <xiuli.pan at intel.com>
>
> After the refine we can not know if a sampler is a constant initialized or not.
> Then the compiler optimization for constant sampler will break and we will
> runtime decide which SAMPLE instruction will use.
> Now fix the sampler refine for LLVM40 to enable the constant check.
> V2: Fix a typo of function __gen_ocl_sampler_to_int type.
>
> Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> ---
> backend/src/libocl/src/ocl_image.cl | 9 ++++----
> backend/src/llvm/llvm_sampler_fix.cpp | 41
> +++++++++++++++++++++++++++++++----
> 2 files changed, 41 insertions(+), 9 deletions(-)
>
> diff --git a/backend/src/libocl/src/ocl_image.cl
> b/backend/src/libocl/src/ocl_image.cl
> index e66aa15..2febfda 100644
> --- a/backend/src/libocl/src/ocl_image.cl
> +++ b/backend/src/libocl/src/ocl_image.cl
> @@ -295,18 +295,17 @@ GEN_VALIDATE_ARRAY_INDEX(int, read_write
> image1d_buffer_t) // The work around is to use a LD message instead of
> normal sample message.
>
> //////////////////////////////////////////////////////////////////////////////
> /
>
> -bool __gen_ocl_sampler_need_fix(int);
> -bool __gen_ocl_sampler_need_rounding_fix(int);
> -int __gen_ocl_sampler_to_int(sampler_t);
> +bool __gen_ocl_sampler_need_fix(sampler_t);
> +bool __gen_ocl_sampler_need_rounding_fix(sampler_t);
>
> bool __gen_sampler_need_fix(const sampler_t sampler) {
> - return
> __gen_ocl_sampler_need_fix(__gen_ocl_sampler_to_int(sampler));
> + return __gen_ocl_sampler_need_fix(sampler);
> }
>
> bool __gen_sampler_need_rounding_fix(const sampler_t sampler) {
> - return
> __gen_ocl_sampler_need_rounding_fix(__gen_ocl_sampler_to_int(sample
> r));
> + return __gen_ocl_sampler_need_rounding_fix(sampler);
> }
>
> INLINE_OVERLOADABLE float __gen_fixup_float_coord(float tmpCoord) diff
> --git a/backend/src/llvm/llvm_sampler_fix.cpp
> b/backend/src/llvm/llvm_sampler_fix.cpp
> index 2e8bcf9..c249755 100644
> --- a/backend/src/llvm/llvm_sampler_fix.cpp
> +++ b/backend/src/llvm/llvm_sampler_fix.cpp
> @@ -55,9 +55,17 @@ namespace gbe {
> // ((sampler & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST));
> bool needFix = true;
> Value *needFixVal;
> +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> + CallInst *init = dyn_cast<CallInst>(I->getOperand(0));
> + if (init && init->getCalledValue()-
> >getName().compare("__translate_sampler_initializer"))
> + {
> + const ConstantInt *ci = dyn_cast<ConstantInt>(init->getOperand(0));
> + uint32_t samplerInt = ci->getZExtValue(); #else
> if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> uint32_t samplerInt = ci->getZExtValue();
> +#endif
> needFix = ((samplerInt & __CLK_ADDRESS_MASK) ==
> CLK_ADDRESS_CLAMP &&
> (samplerInt & __CLK_FILTER_MASK) == CLK_FILTER_NEAREST);
> needFixVal = ConstantInt::get(boolTy, needFix); @@ -65,14 +73,24 @@
> namespace gbe {
> IRBuilder<> Builder(I->getParent());
>
> Builder.SetInsertPoint(I);
> +
> Value *addressMask = ConstantInt::get(i32Ty, __CLK_ADDRESS_MASK);
> - Value *addressMode = Builder.CreateAnd(I->getOperand(0),
> addressMask);
> Value *clampInt = ConstantInt::get(i32Ty, CLK_ADDRESS_CLAMP);
> - Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> clampInt);
> Value *filterMask = ConstantInt::get(i32Ty, __CLK_FILTER_MASK);
> - Value *filterMode = Builder.CreateAnd(I->getOperand(0), filterMask);
> Value *nearestInt = ConstantInt::get(i32Ty, CLK_FILTER_NEAREST);
> +
> +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> + Module *M = I->getParent()->getParent()->getParent();
> + Value* samplerCvt = M-
> >getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I-
> >getOperand(0)->getType(), nullptr);
> + Value *samplerVal = Builder.CreateCall(samplerCvt,
> +{I->getOperand(0)}); #else
> + Value *samplerVal = I->getOperand(0); #endif
> + Value *addressMode = Builder.CreateAnd(samplerVal, addressMask);
> + Value *isClampMode = Builder.CreateICmpEQ(addressMode,
> clampInt);
> + Value *filterMode = Builder.CreateAnd(samplerVal,
> +filterMask);
> Value *isNearestMode = Builder.CreateICmpEQ(filterMode,
> nearestInt);
> +
> needFixVal = Builder.CreateAnd(isClampMode, isNearestMode);
> }
>
> @@ -83,16 +101,31 @@ namespace gbe {
> // return ((sampler & CLK_NORMALIZED_COORDS_TRUE) == 0);
> bool needFix = true;
> Value *needFixVal;
> + #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> + CallInst *init = dyn_cast<CallInst>(I->getOperand(0));
> + if (init && init->getCalledValue()-
> >getName().compare("__translate_sampler_initializer"))
> + {
> + const ConstantInt *ci = dyn_cast<ConstantInt>(init->getOperand(0));
> + uint32_t samplerInt = ci->getZExtValue(); #else
> if (dyn_cast<ConstantInt>(I->getOperand(0))) {
> const ConstantInt *ci = dyn_cast<ConstantInt>(I->getOperand(0));
> uint32_t samplerInt = ci->getZExtValue();
> +#endif
> needFix = samplerInt & CLK_NORMALIZED_COORDS_TRUE;
> needFixVal = ConstantInt::get(boolTy, needFix);
> } else {
> IRBuilder<> Builder(I->getParent());
> Builder.SetInsertPoint(I);
> +#if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40
> + Module *M = I->getParent()->getParent()->getParent();
> + Value* samplerCvt = M-
> >getOrInsertFunction("__gen_ocl_sampler_to_int", i32Ty, I-
> >getOperand(0)->getType(), nullptr);
> + Value *samplerVal = Builder.CreateCall(samplerCvt,
> +{I->getOperand(0)}); #else
> + Value *samplerVal = I->getOperand(0); #endif
> Value *normalizeMask = ConstantInt::get(i32Ty,
> CLK_NORMALIZED_COORDS_TRUE);
> - Value *normalizeMode = Builder.CreateAnd(I->getOperand(0),
> normalizeMask);
> + Value *normalizeMode = Builder.CreateAnd(samplerVal,
> + normalizeMask);
> needFixVal = Builder.CreateICmpEQ(normalizeMode,
> ConstantInt::get(i32Ty, 0));
> }
> I->replaceAllUsesWith(needFixVal);
> --
> 2.7.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list