[Beignet] [PATCH V2] GBE: add param to switch the behavior of math func

Zhigang Gong zhigang.gong at linux.intel.com
Fri Feb 14 08:03:50 CET 2014


I still think that introducing a new pch files is too heavy for this case.
And I just think of another way to achieve the same goal here.

1. You can put the following function to the ocl_stdlib.tmpl.h by default:
"
INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x) {
    return native_sin(x);
}
"

2. And you can add __gen_ocl_internal_intelnative_sin to the vectory proto
definition file builtin_vector_proto.def.

3. Then if the STRICT_CONFORMANCE is disabled, then at the program.cpp when you
compile the user kernel, you can just simply insert the following content to
the head of user kernel and then compile this modified kernel with the single pch file.

"
#ifdef sin
#undef sin
#endif
#define sin __gen_ocl_internal_intelnative_sin
"

What do you think?

On Thu, Feb 13, 2014 at 10:42:19AM +0800, Guo Yejun wrote:
> Add OCL_STRICT_CONFORMANCE to switch the behavior of math func,
> The funcs will be high precision with perf drops if it is 1,
> Fast path with good enough precision will be selected if it is 0.
> 
> This change is to add the code basis, with 'sin' implmented as
> an example, other math functions support will be added later.
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/CMakeLists.txt          |    3 ++-
>  backend/src/CMakeLists.txt      |   13 ++++++++++---
>  backend/src/GBEConfig.h.in      |    1 +
>  backend/src/backend/program.cpp |   12 +++++++++++-
>  backend/src/ocl_stdlib.tmpl.h   |   27 +++++++++++++++++++++++++++
>  utests/setenv.sh.in             |    1 +
>  6 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt
> index dd55a4a..08122da 100644
> --- a/backend/CMakeLists.txt
> +++ b/backend/CMakeLists.txt
> @@ -98,8 +98,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR})
>  ##############################################################
>  add_subdirectory (src)
>  set(LOCAL_PCH_OBJECT_DIR ${LOCAL_PCH_OBJECT_DIR} PARENT_SCOPE)
> +set(LOCAL_PCH_STRICT_OBJECT_DIR ${LOCAL_PCH_STRICT_OBJECT_DIR} PARENT_SCOPE)
>  set(LOCAL_PCM_OBJECT_DIR ${LOCAL_PCM_OBJECT_DIR} PARENT_SCOPE)
>  set (GBE_BIN_GENERATER
> -     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR} OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
> +     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR} OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR} OCL_PCH_STRICT_PATH=${LOCAL_PCH_STRICT_OBJECT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
>       PARENT_SCOPE)
>  
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
> index 33494a0..df36069 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -43,6 +43,8 @@ add_custom_command(
>  
>  set (pch_object ${ocl_blob_file}.pch)
>  set (local_pch_object ${ocl_blob_file}.local.pch)
> +set (pch_strict_object ${ocl_blob_file}.strict.pch)
> +set (local_pch_strict_object ${ocl_blob_file}.strict.local.pch)
>  # generate pch object
>  if (LLVM_VERSION_NODOT VERSION_GREATER 32)
>      set (clang_cmd -cc1 -x cl -triple spir -ffp-contract=off)
> @@ -56,15 +58,17 @@ endif (LLVM_VERSION_NODOT VERSION_GREATER 32)
>  set (clang_cmd ${clang_cmd} -fno-builtin -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
>  
>  add_custom_command(
> -     OUTPUT ${pch_object}
> -     COMMAND rm -f ${pch_object}
> +     OUTPUT ${pch_object} ${pch_strict_object}
> +     COMMAND rm -f ${pch_object} ${pch_strict_object}
>       COMMAND clang ${clang_cmd} --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_object}
>       COMMAND clang ${clang_cmd} -emit-pch ${ocl_blob_file} -o ${local_pch_object}
> +	 COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1 --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_strict_object}
> +     COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1 -emit-pch ${ocl_blob_file} -o ${local_pch_strict_object}
>       DEPENDS ${ocl_blob_file}
>       )
>  
>  add_custom_target(pch_object
> -                  DEPENDS ${pch_object})
> +                  DEPENDS ${pch_object} ${pch_strict_object})
>  
>  macro(ll_add_library ll_lib ll_sources)
>    foreach (ll ${${ll_sources}})
> @@ -196,13 +200,16 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
>  #install (FILES backend/program.h DESTINATION include/gen)
>  install (FILES ${ocl_blob_file} DESTINATION ${LIB_INSTALL_DIR}/beignet)
>  install (FILES ${pch_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
> +install (FILES ${pch_strict_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
>  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION ${LIB_INSTALL_DIR}/beignet)
>  # When build beignet itself, we need to export the local precompiled header file and precompiled module
>  # file to libcl and utests.
>  set (LOCAL_PCH_OBJECT_DIR "${local_pch_object}:${beignet_install_path}/ocl_stdlib.h.pch" PARENT_SCOPE)
> +set (LOCAL_PCH_STRICT_OBJECT_DIR "${local_pch_strict_object}:${beignet_install_path}/ocl_stdlib.h.strict.pch" PARENT_SCOPE)
>  set (LOCAL_PCM_OBJECT_DIR "${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib}:${beignet_install_path}/${pcm_lib}" PARENT_SCOPE)
>  
>  set (PCH_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.pch")
> +set (PCH_STRICT_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.strict.pch")
>  set (PCM_OBJECT_DIR "${beignet_install_path}/${pcm_lib}")
>  configure_file (
>    "GBEConfig.h.in"
> diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in
> index 5bc09b8..c446754 100644
> --- a/backend/src/GBEConfig.h.in
> +++ b/backend/src/GBEConfig.h.in
> @@ -2,4 +2,5 @@
>  #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@
>  #define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@
>  #define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@"
> +#define PCH_STRICT_OBJECT_DIR "@PCH_STRICT_OBJECT_DIR@"
>  #define PCM_OBJECT_DIR "@PCM_OBJECT_DIR@"
> diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
> index 2492a8b..496a9a0 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -466,6 +466,7 @@ namespace gbe {
>  
>    BVAR(OCL_OUTPUT_BUILD_LOG, false);
>    SVAR(OCL_PCH_PATH, PCH_OBJECT_DIR);
> +  SVAR(OCL_PCH_STRICT_PATH, PCH_STRICT_OBJECT_DIR);
>    SVAR(OCL_PCM_PATH, PCM_OBJECT_DIR);
>  
>    static bool buildModuleFromSource(const char* input, const char* output, std::string options,
> @@ -646,6 +647,7 @@ namespace gbe {
>    extern std::string ocl_stdlib_str;
>  
>    BVAR(OCL_USE_PCH, true);
> +  BVAR(OCL_STRICT_CONFORMANCE, true);
>    static gbe_program programNewFromSource(const char *source,
>                                            size_t stringSize,
>                                            const char *options,
> @@ -743,6 +745,10 @@ namespace gbe {
>      }
>  
>      std::string dirs = OCL_PCH_PATH;
> +    if (OCL_STRICT_CONFORMANCE){
> +      dirs = OCL_PCH_STRICT_PATH;
> +    }
> +
>      std::istringstream idirs(dirs);
>      std::string pchFileName;
>  
> @@ -757,8 +763,12 @@ namespace gbe {
>        clOpt += " -include-pch ";
>        clOpt += pchFileName;
>        clOpt += " ";
> -    } else
> +    } else {
> +      if (OCL_STRICT_CONFORMANCE){
> +        clOpt += " -DOCL_STRICT_CONFORMANCE=1 ";
> +      }
>        fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile);
> +    }
>  
>      // Write the source to the cl file
>      fwrite(source, strlen(source), 1, clFile);
> diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
> index d191b8e..8401f0f 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4462,6 +4462,33 @@ INLINE_OVERLOADABLE  size_t get_image_array_size(image1d_array_t image)
>    { return __gen_ocl_get_image_array_size(image); }
>  #endif
>  
> +
> +
> +/// It is required by OpenCL that built-in math functions (without native_/half_)
> +/// have high precision, but GPU hardware is designed to be good enough precision,
> +/// so most functions will be emulated with higph and make performance drops.
> +/// This is not an issue if the applications could choose the proper functions, for
> +/// example, use native_* functions for cases without highp requirement.
> +/// Due to the fact that applications always use math functions without native_/half_,
> +/// environment variable OCL_STRICT_CONFORMANCE is introduced to switch the behavior
> +/// of the math functions.
> +/// The math functions will be emulated with highp if OCL_STRICT_CONFORMANCE is 1 (the following code block is disable),
> +/// and choose fast path with good enough precision if OCL_STRICT_CONFORMANCE is 0 (the following code block is enabled).
> +#ifndef OCL_STRICT_CONFORMANCE
> +
> +#ifdef sin
> +#undef sin
> +#endif
> +#define sin __gen_ocl_internal_intelnative_sin
> +INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x)
> +{
> +    return native_sin(x);
> +}
> +
> +#endif //OCL_STRICT_CONFORMANCE
> +
> +
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : disable
>  
>  #undef DECL_IMAGE
> diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in
> index ad77369..17a2e28 100644
> --- a/utests/setenv.sh.in
> +++ b/utests/setenv.sh.in
> @@ -2,4 +2,5 @@
>  #
>  export OCL_PCM_PATH=@LOCAL_PCM_OBJECT_DIR@
>  export OCL_PCH_PATH=@LOCAL_PCH_OBJECT_DIR@
> +export OCL_PCH_STRICT_PATH=@LOCAL_PCH_STRICT_OBJECT_DIR@
>  export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list