[Beignet] [PATCH V2] GBE: add param to switch the behavior of math func

Guo, Yejun yejun.guo at intel.com
Fri Feb 14 09:20:47 CET 2014


With this design, we can decrease build time by not generating the second pch file, the only shortcoming is that the logic is separated in different places.

And for the implementation, I plan to add a const static std::string variable in program.cpp to hold the define relative content, and also use the function name like __gen_ocl_internal_fastpath_sin, do you think is it ok? Thanks.

Thanks
Yejun


-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com] 
Sent: Friday, February 14, 2014 3:04 PM
To: Guo, Yejun
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH V2] GBE: add param to switch the behavior of math func

I still think that introducing a new pch files is too heavy for this case.
And I just think of another way to achieve the same goal here.

1. You can put the following function to the ocl_stdlib.tmpl.h by default:
"
INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x) {
    return native_sin(x);
}
"

2. And you can add __gen_ocl_internal_intelnative_sin to the vectory proto definition file builtin_vector_proto.def.

3. Then if the STRICT_CONFORMANCE is disabled, then at the program.cpp when you compile the user kernel, you can just simply insert the following content to the head of user kernel and then compile this modified kernel with the single pch file.

"
#ifdef sin
#undef sin
#endif
#define sin __gen_ocl_internal_intelnative_sin
"

What do you think?

On Thu, Feb 13, 2014 at 10:42:19AM +0800, Guo Yejun wrote:
> Add OCL_STRICT_CONFORMANCE to switch the behavior of math func, The 
> funcs will be high precision with perf drops if it is 1, Fast path 
> with good enough precision will be selected if it is 0.
> 
> This change is to add the code basis, with 'sin' implmented as an 
> example, other math functions support will be added later.
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/CMakeLists.txt          |    3 ++-
>  backend/src/CMakeLists.txt      |   13 ++++++++++---
>  backend/src/GBEConfig.h.in      |    1 +
>  backend/src/backend/program.cpp |   12 +++++++++++-
>  backend/src/ocl_stdlib.tmpl.h   |   27 +++++++++++++++++++++++++++
>  utests/setenv.sh.in             |    1 +
>  6 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt index 
> dd55a4a..08122da 100644
> --- a/backend/CMakeLists.txt
> +++ b/backend/CMakeLists.txt
> @@ -98,8 +98,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR})  
> ##############################################################
>  add_subdirectory (src)
>  set(LOCAL_PCH_OBJECT_DIR ${LOCAL_PCH_OBJECT_DIR} PARENT_SCOPE)
> +set(LOCAL_PCH_STRICT_OBJECT_DIR ${LOCAL_PCH_STRICT_OBJECT_DIR} 
> +PARENT_SCOPE)
>  set(LOCAL_PCM_OBJECT_DIR ${LOCAL_PCM_OBJECT_DIR} PARENT_SCOPE)  set 
> (GBE_BIN_GENERATER
> -     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR} OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
> +     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR} 
> + OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR} 
> + OCL_PCH_STRICT_PATH=${LOCAL_PCH_STRICT_OBJECT_DIR} 
> + ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
>       PARENT_SCOPE)
>  
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt 
> index 33494a0..df36069 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -43,6 +43,8 @@ add_custom_command(
>  
>  set (pch_object ${ocl_blob_file}.pch)  set (local_pch_object 
> ${ocl_blob_file}.local.pch)
> +set (pch_strict_object ${ocl_blob_file}.strict.pch) set 
> +(local_pch_strict_object ${ocl_blob_file}.strict.local.pch)
>  # generate pch object
>  if (LLVM_VERSION_NODOT VERSION_GREATER 32)
>      set (clang_cmd -cc1 -x cl -triple spir -ffp-contract=off) @@ 
> -56,15 +58,17 @@ endif (LLVM_VERSION_NODOT VERSION_GREATER 32)  set 
> (clang_cmd ${clang_cmd} -fno-builtin 
> -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
>  
>  add_custom_command(
> -     OUTPUT ${pch_object}
> -     COMMAND rm -f ${pch_object}
> +     OUTPUT ${pch_object} ${pch_strict_object}
> +     COMMAND rm -f ${pch_object} ${pch_strict_object}
>       COMMAND clang ${clang_cmd} --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_object}
>       COMMAND clang ${clang_cmd} -emit-pch ${ocl_blob_file} -o 
> ${local_pch_object}
> +	 COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1 --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_strict_object}
> +     COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1 -emit-pch 
> +${ocl_blob_file} -o ${local_pch_strict_object}
>       DEPENDS ${ocl_blob_file}
>       )
>  
>  add_custom_target(pch_object
> -                  DEPENDS ${pch_object})
> +                  DEPENDS ${pch_object} ${pch_strict_object})
>  
>  macro(ll_add_library ll_lib ll_sources)
>    foreach (ll ${${ll_sources}})
> @@ -196,13 +200,16 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)  
> #install (FILES backend/program.h DESTINATION include/gen)  install 
> (FILES ${ocl_blob_file} DESTINATION ${LIB_INSTALL_DIR}/beignet)  
> install (FILES ${pch_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
> +install (FILES ${pch_strict_object} DESTINATION 
> +${LIB_INSTALL_DIR}/beignet)
>  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION 
> ${LIB_INSTALL_DIR}/beignet)  # When build beignet itself, we need to 
> export the local precompiled header file and precompiled module  # file to libcl and utests.
>  set (LOCAL_PCH_OBJECT_DIR 
> "${local_pch_object}:${beignet_install_path}/ocl_stdlib.h.pch" 
> PARENT_SCOPE)
> +set (LOCAL_PCH_STRICT_OBJECT_DIR 
> +"${local_pch_strict_object}:${beignet_install_path}/ocl_stdlib.h.stri
> +ct.pch" PARENT_SCOPE)
>  set (LOCAL_PCM_OBJECT_DIR 
> "${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib}:${beignet_install_path}/${pcm_
> lib}" PARENT_SCOPE)
>  
>  set (PCH_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.pch")
> +set (PCH_STRICT_OBJECT_DIR 
> +"${beignet_install_path}/ocl_stdlib.h.strict.pch")
>  set (PCM_OBJECT_DIR "${beignet_install_path}/${pcm_lib}")
>  configure_file (
>    "GBEConfig.h.in"
> diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in 
> index 5bc09b8..c446754 100644
> --- a/backend/src/GBEConfig.h.in
> +++ b/backend/src/GBEConfig.h.in
> @@ -2,4 +2,5 @@
>  #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@  #define 
> LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@  #define PCH_OBJECT_DIR 
> "@PCH_OBJECT_DIR@"
> +#define PCH_STRICT_OBJECT_DIR "@PCH_STRICT_OBJECT_DIR@"
>  #define PCM_OBJECT_DIR "@PCM_OBJECT_DIR@"
> diff --git a/backend/src/backend/program.cpp 
> b/backend/src/backend/program.cpp index 2492a8b..496a9a0 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -466,6 +466,7 @@ namespace gbe {
>  
>    BVAR(OCL_OUTPUT_BUILD_LOG, false);
>    SVAR(OCL_PCH_PATH, PCH_OBJECT_DIR);
> +  SVAR(OCL_PCH_STRICT_PATH, PCH_STRICT_OBJECT_DIR);
>    SVAR(OCL_PCM_PATH, PCM_OBJECT_DIR);
>  
>    static bool buildModuleFromSource(const char* input, const char* 
> output, std::string options, @@ -646,6 +647,7 @@ namespace gbe {
>    extern std::string ocl_stdlib_str;
>  
>    BVAR(OCL_USE_PCH, true);
> +  BVAR(OCL_STRICT_CONFORMANCE, true);
>    static gbe_program programNewFromSource(const char *source,
>                                            size_t stringSize,
>                                            const char *options, @@ 
> -743,6 +745,10 @@ namespace gbe {
>      }
>  
>      std::string dirs = OCL_PCH_PATH;
> +    if (OCL_STRICT_CONFORMANCE){
> +      dirs = OCL_PCH_STRICT_PATH;
> +    }
> +
>      std::istringstream idirs(dirs);
>      std::string pchFileName;
>  
> @@ -757,8 +763,12 @@ namespace gbe {
>        clOpt += " -include-pch ";
>        clOpt += pchFileName;
>        clOpt += " ";
> -    } else
> +    } else {
> +      if (OCL_STRICT_CONFORMANCE){
> +        clOpt += " -DOCL_STRICT_CONFORMANCE=1 ";
> +      }
>        fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 
> 1, clFile);
> +    }
>  
>      // Write the source to the cl file
>      fwrite(source, strlen(source), 1, clFile); diff --git 
> a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 
> d191b8e..8401f0f 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4462,6 +4462,33 @@ INLINE_OVERLOADABLE  size_t get_image_array_size(image1d_array_t image)
>    { return __gen_ocl_get_image_array_size(image); }  #endif
>  
> +
> +
> +/// It is required by OpenCL that built-in math functions (without 
> +native_/half_) /// have high precision, but GPU hardware is designed 
> +to be good enough precision, /// so most functions will be emulated with higph and make performance drops.
> +/// This is not an issue if the applications could choose the proper 
> +functions, for /// example, use native_* functions for cases without highp requirement.
> +/// Due to the fact that applications always use math functions 
> +without native_/half_, /// environment variable 
> +OCL_STRICT_CONFORMANCE is introduced to switch the behavior /// of the math functions.
> +/// The math functions will be emulated with highp if 
> +OCL_STRICT_CONFORMANCE is 1 (the following code block is disable), /// and choose fast path with good enough precision if OCL_STRICT_CONFORMANCE is 0 (the following code block is enabled).
> +#ifndef OCL_STRICT_CONFORMANCE
> +
> +#ifdef sin
> +#undef sin
> +#endif
> +#define sin __gen_ocl_internal_intelnative_sin
> +INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x) 
> +{
> +    return native_sin(x);
> +}
> +
> +#endif //OCL_STRICT_CONFORMANCE
> +
> +
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : disable
>  
>  #undef DECL_IMAGE
> diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in index 
> ad77369..17a2e28 100644
> --- a/utests/setenv.sh.in
> +++ b/utests/setenv.sh.in
> @@ -2,4 +2,5 @@
>  #
>  export OCL_PCM_PATH=@LOCAL_PCM_OBJECT_DIR@
>  export OCL_PCH_PATH=@LOCAL_PCH_OBJECT_DIR@
> +export OCL_PCH_STRICT_PATH=@LOCAL_PCH_STRICT_OBJECT_DIR@
>  export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
> --
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list