[Beignet] [PATCH V2] GBE: add param to switch the behavior of math func

Sun, Yi yi.sun at intel.com
Fri Feb 14 02:27:51 CET 2014


BTW, what kind of precision can be accepted when OCL_STRICT_CONFORMANCE is 0?
And is it necessary for QA to implement the conformance test case when OCL_STRICT_CONFORMANCE is 0?

Thanks
  --Sun, Yi

> -----Original Message-----
> From: beignet-bounces at lists.freedesktop.org
> [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Guo Yejun
> Sent: Thursday, February 13, 2014 10:42 AM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2] GBE: add param to switch the behavior of math
> func
> 
> Add OCL_STRICT_CONFORMANCE to switch the behavior of math func, The
> funcs will be high precision with perf drops if it is 1, Fast path with good enough
> precision will be selected if it is 0.
> 
> This change is to add the code basis, with 'sin' implmented as an example,
> other math functions support will be added later.
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/CMakeLists.txt          |    3 ++-
>  backend/src/CMakeLists.txt      |   13 ++++++++++---
>  backend/src/GBEConfig.h.in      |    1 +
>  backend/src/backend/program.cpp |   12 +++++++++++-
>  backend/src/ocl_stdlib.tmpl.h   |   27 +++++++++++++++++++++++++++
>  utests/setenv.sh.in             |    1 +
>  6 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt index
> dd55a4a..08122da 100644
> --- a/backend/CMakeLists.txt
> +++ b/backend/CMakeLists.txt
> @@ -98,8 +98,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR})
> ##############################################################
>  add_subdirectory (src)
>  set(LOCAL_PCH_OBJECT_DIR ${LOCAL_PCH_OBJECT_DIR} PARENT_SCOPE)
> +set(LOCAL_PCH_STRICT_OBJECT_DIR ${LOCAL_PCH_STRICT_OBJECT_DIR}
> +PARENT_SCOPE)
>  set(LOCAL_PCM_OBJECT_DIR ${LOCAL_PCM_OBJECT_DIR} PARENT_SCOPE)
> set (GBE_BIN_GENERATER
> -     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR}
> OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR}
> ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
> +     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR}
> + OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR}
> + OCL_PCH_STRICT_PATH=${LOCAL_PCH_STRICT_OBJECT_DIR}
> + ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
>       PARENT_SCOPE)
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index
> 33494a0..df36069 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -43,6 +43,8 @@ add_custom_command(
> 
>  set (pch_object ${ocl_blob_file}.pch)
>  set (local_pch_object ${ocl_blob_file}.local.pch)
> +set (pch_strict_object ${ocl_blob_file}.strict.pch) set
> +(local_pch_strict_object ${ocl_blob_file}.strict.local.pch)
>  # generate pch object
>  if (LLVM_VERSION_NODOT VERSION_GREATER 32)
>      set (clang_cmd -cc1 -x cl -triple spir -ffp-contract=off) @@ -56,15 +58,17
> @@ endif (LLVM_VERSION_NODOT VERSION_GREATER 32)  set (clang_cmd
> ${clang_cmd} -fno-builtin
> -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
> 
>  add_custom_command(
> -     OUTPUT ${pch_object}
> -     COMMAND rm -f ${pch_object}
> +     OUTPUT ${pch_object} ${pch_strict_object}
> +     COMMAND rm -f ${pch_object} ${pch_strict_object}
>       COMMAND clang ${clang_cmd} --relocatable-pch -emit-pch -isysroot
> ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_object}
>       COMMAND clang ${clang_cmd} -emit-pch ${ocl_blob_file} -o
> ${local_pch_object}
> +	 COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1
> --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR}
> ${ocl_blob_file} -o ${pch_strict_object}
> +     COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1
> -emit-pch
> +${ocl_blob_file} -o ${local_pch_strict_object}
>       DEPENDS ${ocl_blob_file}
>       )
> 
>  add_custom_target(pch_object
> -                  DEPENDS ${pch_object})
> +                  DEPENDS ${pch_object} ${pch_strict_object})
> 
>  macro(ll_add_library ll_lib ll_sources)
>    foreach (ll ${${ll_sources}})
> @@ -196,13 +200,16 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
> #install (FILES backend/program.h DESTINATION include/gen)  install (FILES
> ${ocl_blob_file} DESTINATION ${LIB_INSTALL_DIR}/beignet)  install (FILES
> ${pch_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
> +install (FILES ${pch_strict_object} DESTINATION
> +${LIB_INSTALL_DIR}/beignet)
>  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION
> ${LIB_INSTALL_DIR}/beignet)  # When build beignet itself, we need to export
> the local precompiled header file and precompiled module  # file to libcl and
> utests.
>  set (LOCAL_PCH_OBJECT_DIR
> "${local_pch_object}:${beignet_install_path}/ocl_stdlib.h.pch" PARENT_SCOPE)
> +set (LOCAL_PCH_STRICT_OBJECT_DIR
> +"${local_pch_strict_object}:${beignet_install_path}/ocl_stdlib.h.strict
> +.pch" PARENT_SCOPE)
>  set (LOCAL_PCM_OBJECT_DIR
> "${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib}:${beignet_install_path}/${pcm
> _lib}" PARENT_SCOPE)
> 
>  set (PCH_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.pch")
> +set (PCH_STRICT_OBJECT_DIR
> +"${beignet_install_path}/ocl_stdlib.h.strict.pch")
>  set (PCM_OBJECT_DIR "${beignet_install_path}/${pcm_lib}")
>  configure_file (
>    "GBEConfig.h.in"
> diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in index
> 5bc09b8..c446754 100644
> --- a/backend/src/GBEConfig.h.in
> +++ b/backend/src/GBEConfig.h.in
> @@ -2,4 +2,5 @@
>  #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@  #define
> LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@  #define
> PCH_OBJECT_DIR "@PCH_OBJECT_DIR@"
> +#define PCH_STRICT_OBJECT_DIR "@PCH_STRICT_OBJECT_DIR@"
>  #define PCM_OBJECT_DIR "@PCM_OBJECT_DIR@"
> diff --git a/backend/src/backend/program.cpp
> b/backend/src/backend/program.cpp index 2492a8b..496a9a0 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -466,6 +466,7 @@ namespace gbe {
> 
>    BVAR(OCL_OUTPUT_BUILD_LOG, false);
>    SVAR(OCL_PCH_PATH, PCH_OBJECT_DIR);
> +  SVAR(OCL_PCH_STRICT_PATH, PCH_STRICT_OBJECT_DIR);
>    SVAR(OCL_PCM_PATH, PCM_OBJECT_DIR);
> 
>    static bool buildModuleFromSource(const char* input, const char* output,
> std::string options, @@ -646,6 +647,7 @@ namespace gbe {
>    extern std::string ocl_stdlib_str;
> 
>    BVAR(OCL_USE_PCH, true);
> +  BVAR(OCL_STRICT_CONFORMANCE, true);
>    static gbe_program programNewFromSource(const char *source,
>                                            size_t stringSize,
>                                            const char *options, @@
> -743,6 +745,10 @@ namespace gbe {
>      }
> 
>      std::string dirs = OCL_PCH_PATH;
> +    if (OCL_STRICT_CONFORMANCE){
> +      dirs = OCL_PCH_STRICT_PATH;
> +    }
> +
>      std::istringstream idirs(dirs);
>      std::string pchFileName;
> 
> @@ -757,8 +763,12 @@ namespace gbe {
>        clOpt += " -include-pch ";
>        clOpt += pchFileName;
>        clOpt += " ";
> -    } else
> +    } else {
> +      if (OCL_STRICT_CONFORMANCE){
> +        clOpt += " -DOCL_STRICT_CONFORMANCE=1 ";
> +      }
>        fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile);
> +    }
> 
>      // Write the source to the cl file
>      fwrite(source, strlen(source), 1, clFile); diff --git
> a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index
> d191b8e..8401f0f 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4462,6 +4462,33 @@ INLINE_OVERLOADABLE  size_t
> get_image_array_size(image1d_array_t image)
>    { return __gen_ocl_get_image_array_size(image); }  #endif
> 
> +
> +
> +/// It is required by OpenCL that built-in math functions (without
> +native_/half_) /// have high precision, but GPU hardware is designed to
> +be good enough precision, /// so most functions will be emulated with higph
> and make performance drops.
> +/// This is not an issue if the applications could choose the proper
> +functions, for /// example, use native_* functions for cases without highp
> requirement.
> +/// Due to the fact that applications always use math functions without
> +native_/half_, /// environment variable OCL_STRICT_CONFORMANCE is
> +introduced to switch the behavior /// of the math functions.
> +/// The math functions will be emulated with highp if
> +OCL_STRICT_CONFORMANCE is 1 (the following code block is disable), /// and
> choose fast path with good enough precision if OCL_STRICT_CONFORMANCE is
> 0 (the following code block is enabled).
> +#ifndef OCL_STRICT_CONFORMANCE
> +
> +#ifdef sin
> +#undef sin
> +#endif
> +#define sin __gen_ocl_internal_intelnative_sin
> +INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x) {
> +    return native_sin(x);
> +}
> +
> +#endif //OCL_STRICT_CONFORMANCE
> +
> +
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : disable
> 
>  #undef DECL_IMAGE
> diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in index ad77369..17a2e28
> 100644
> --- a/utests/setenv.sh.in
> +++ b/utests/setenv.sh.in
> @@ -2,4 +2,5 @@
>  #
>  export OCL_PCM_PATH=@LOCAL_PCM_OBJECT_DIR@
>  export OCL_PCH_PATH=@LOCAL_PCH_OBJECT_DIR@
> +export OCL_PCH_STRICT_PATH=@LOCAL_PCH_STRICT_OBJECT_DIR@
>  export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
> --
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list