[Beignet] [PATCH V2] GBE: add param to switch the behavior of math func

Guo, Yejun yejun.guo at intel.com
Fri Feb 14 02:55:54 CET 2014


Hi Yi,

The fast path precision (when OCL_STRICT_CONFORMANCE is 0) depends on the GPU hardware instruction, we have to figure out the precision one by one.

For the conformance test suite, it would be nice to add fast path test, but just guess some tests will be meaningless if the max precision error is very big.

Thanks
Yejun

-----Original Message-----
From: Sun, Yi 
Sent: Friday, February 14, 2014 9:28 AM
To: Guo, Yejun; beignet at lists.freedesktop.org
Cc: Guo, Yejun
Subject: RE: [Beignet] [PATCH V2] GBE: add param to switch the behavior of math func

BTW, what kind of precision can be accepted when OCL_STRICT_CONFORMANCE is 0?
And is it necessary for QA to implement the conformance test case when OCL_STRICT_CONFORMANCE is 0?

Thanks
  --Sun, Yi

> -----Original Message-----
> From: beignet-bounces at lists.freedesktop.org
> [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Guo Yejun
> Sent: Thursday, February 13, 2014 10:42 AM
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun
> Subject: [Beignet] [PATCH V2] GBE: add param to switch the behavior of 
> math func
> 
> Add OCL_STRICT_CONFORMANCE to switch the behavior of math func, The 
> funcs will be high precision with perf drops if it is 1, Fast path 
> with good enough precision will be selected if it is 0.
> 
> This change is to add the code basis, with 'sin' implmented as an 
> example, other math functions support will be added later.
> 
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  backend/CMakeLists.txt          |    3 ++-
>  backend/src/CMakeLists.txt      |   13 ++++++++++---
>  backend/src/GBEConfig.h.in      |    1 +
>  backend/src/backend/program.cpp |   12 +++++++++++-
>  backend/src/ocl_stdlib.tmpl.h   |   27 +++++++++++++++++++++++++++
>  utests/setenv.sh.in             |    1 +
>  6 files changed, 52 insertions(+), 5 deletions(-)
> 
> diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt index 
> dd55a4a..08122da 100644
> --- a/backend/CMakeLists.txt
> +++ b/backend/CMakeLists.txt
> @@ -98,8 +98,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR}) 
> ##############################################################
>  add_subdirectory (src)
>  set(LOCAL_PCH_OBJECT_DIR ${LOCAL_PCH_OBJECT_DIR} PARENT_SCOPE)
> +set(LOCAL_PCH_STRICT_OBJECT_DIR ${LOCAL_PCH_STRICT_OBJECT_DIR}
> +PARENT_SCOPE)
>  set(LOCAL_PCM_OBJECT_DIR ${LOCAL_PCM_OBJECT_DIR} PARENT_SCOPE) set 
> (GBE_BIN_GENERATER
> -     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR}
> OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR}
> ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
> +     OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR}
> + OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR}
> + OCL_PCH_STRICT_PATH=${LOCAL_PCH_STRICT_OBJECT_DIR}
> + ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
>       PARENT_SCOPE)
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt 
> index
> 33494a0..df36069 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -43,6 +43,8 @@ add_custom_command(
> 
>  set (pch_object ${ocl_blob_file}.pch)  set (local_pch_object 
> ${ocl_blob_file}.local.pch)
> +set (pch_strict_object ${ocl_blob_file}.strict.pch) set 
> +(local_pch_strict_object ${ocl_blob_file}.strict.local.pch)
>  # generate pch object
>  if (LLVM_VERSION_NODOT VERSION_GREATER 32)
>      set (clang_cmd -cc1 -x cl -triple spir -ffp-contract=off) @@ 
> -56,15 +58,17 @@ endif (LLVM_VERSION_NODOT VERSION_GREATER 32)  set 
> (clang_cmd ${clang_cmd} -fno-builtin
> -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
> 
>  add_custom_command(
> -     OUTPUT ${pch_object}
> -     COMMAND rm -f ${pch_object}
> +     OUTPUT ${pch_object} ${pch_strict_object}
> +     COMMAND rm -f ${pch_object} ${pch_strict_object}
>       COMMAND clang ${clang_cmd} --relocatable-pch -emit-pch -isysroot 
> ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_object}
>       COMMAND clang ${clang_cmd} -emit-pch ${ocl_blob_file} -o 
> ${local_pch_object}
> +	 COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1
> --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} 
> ${ocl_blob_file} -o ${pch_strict_object}
> +     COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1
> -emit-pch
> +${ocl_blob_file} -o ${local_pch_strict_object}
>       DEPENDS ${ocl_blob_file}
>       )
> 
>  add_custom_target(pch_object
> -                  DEPENDS ${pch_object})
> +                  DEPENDS ${pch_object} ${pch_strict_object})
> 
>  macro(ll_add_library ll_lib ll_sources)
>    foreach (ll ${${ll_sources}})
> @@ -196,13 +200,16 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe) 
> #install (FILES backend/program.h DESTINATION include/gen)  install 
> (FILES ${ocl_blob_file} DESTINATION ${LIB_INSTALL_DIR}/beignet)  
> install (FILES ${pch_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
> +install (FILES ${pch_strict_object} DESTINATION
> +${LIB_INSTALL_DIR}/beignet)
>  install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION
> ${LIB_INSTALL_DIR}/beignet)  # When build beignet itself, we need to 
> export the local precompiled header file and precompiled module  # 
> file to libcl and utests.
>  set (LOCAL_PCH_OBJECT_DIR
> "${local_pch_object}:${beignet_install_path}/ocl_stdlib.h.pch" 
> PARENT_SCOPE)
> +set (LOCAL_PCH_STRICT_OBJECT_DIR
> +"${local_pch_strict_object}:${beignet_install_path}/ocl_stdlib.h.stri
> +ct
> +.pch" PARENT_SCOPE)
>  set (LOCAL_PCM_OBJECT_DIR
> "${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib}:${beignet_install_path}/${pcm
> _lib}" PARENT_SCOPE)
> 
>  set (PCH_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.pch")
> +set (PCH_STRICT_OBJECT_DIR
> +"${beignet_install_path}/ocl_stdlib.h.strict.pch")
>  set (PCM_OBJECT_DIR "${beignet_install_path}/${pcm_lib}")
>  configure_file (
>    "GBEConfig.h.in"
> diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in 
> index
> 5bc09b8..c446754 100644
> --- a/backend/src/GBEConfig.h.in
> +++ b/backend/src/GBEConfig.h.in
> @@ -2,4 +2,5 @@
>  #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@  #define 
> LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@  #define PCH_OBJECT_DIR 
> "@PCH_OBJECT_DIR@"
> +#define PCH_STRICT_OBJECT_DIR "@PCH_STRICT_OBJECT_DIR@"
>  #define PCM_OBJECT_DIR "@PCM_OBJECT_DIR@"
> diff --git a/backend/src/backend/program.cpp 
> b/backend/src/backend/program.cpp index 2492a8b..496a9a0 100644
> --- a/backend/src/backend/program.cpp
> +++ b/backend/src/backend/program.cpp
> @@ -466,6 +466,7 @@ namespace gbe {
> 
>    BVAR(OCL_OUTPUT_BUILD_LOG, false);
>    SVAR(OCL_PCH_PATH, PCH_OBJECT_DIR);
> +  SVAR(OCL_PCH_STRICT_PATH, PCH_STRICT_OBJECT_DIR);
>    SVAR(OCL_PCM_PATH, PCM_OBJECT_DIR);
> 
>    static bool buildModuleFromSource(const char* input, const char* 
> output, std::string options, @@ -646,6 +647,7 @@ namespace gbe {
>    extern std::string ocl_stdlib_str;
> 
>    BVAR(OCL_USE_PCH, true);
> +  BVAR(OCL_STRICT_CONFORMANCE, true);
>    static gbe_program programNewFromSource(const char *source,
>                                            size_t stringSize,
>                                            const char *options, @@
> -743,6 +745,10 @@ namespace gbe {
>      }
> 
>      std::string dirs = OCL_PCH_PATH;
> +    if (OCL_STRICT_CONFORMANCE){
> +      dirs = OCL_PCH_STRICT_PATH;
> +    }
> +
>      std::istringstream idirs(dirs);
>      std::string pchFileName;
> 
> @@ -757,8 +763,12 @@ namespace gbe {
>        clOpt += " -include-pch ";
>        clOpt += pchFileName;
>        clOpt += " ";
> -    } else
> +    } else {
> +      if (OCL_STRICT_CONFORMANCE){
> +        clOpt += " -DOCL_STRICT_CONFORMANCE=1 ";
> +      }
>        fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 
> 1, clFile);
> +    }
> 
>      // Write the source to the cl file
>      fwrite(source, strlen(source), 1, clFile); diff --git 
> a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index 
> d191b8e..8401f0f 100755
> --- a/backend/src/ocl_stdlib.tmpl.h
> +++ b/backend/src/ocl_stdlib.tmpl.h
> @@ -4462,6 +4462,33 @@ INLINE_OVERLOADABLE  size_t 
> get_image_array_size(image1d_array_t image)
>    { return __gen_ocl_get_image_array_size(image); }  #endif
> 
> +
> +
> +/// It is required by OpenCL that built-in math functions (without
> +native_/half_) /// have high precision, but GPU hardware is designed 
> +to be good enough precision, /// so most functions will be emulated 
> +with higph
> and make performance drops.
> +/// This is not an issue if the applications could choose the proper 
> +functions, for /// example, use native_* functions for cases without 
> +highp
> requirement.
> +/// Due to the fact that applications always use math functions 
> +without native_/half_, /// environment variable 
> +OCL_STRICT_CONFORMANCE is introduced to switch the behavior /// of the math functions.
> +/// The math functions will be emulated with highp if 
> +OCL_STRICT_CONFORMANCE is 1 (the following code block is disable), 
> +/// and
> choose fast path with good enough precision if OCL_STRICT_CONFORMANCE 
> is
> 0 (the following code block is enabled).
> +#ifndef OCL_STRICT_CONFORMANCE
> +
> +#ifdef sin
> +#undef sin
> +#endif
> +#define sin __gen_ocl_internal_intelnative_sin
> +INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x) {
> +    return native_sin(x);
> +}
> +
> +#endif //OCL_STRICT_CONFORMANCE
> +
> +
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : disable
> 
>  #undef DECL_IMAGE
> diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in index 
> ad77369..17a2e28
> 100644
> --- a/utests/setenv.sh.in
> +++ b/utests/setenv.sh.in
> @@ -2,4 +2,5 @@
>  #
>  export OCL_PCM_PATH=@LOCAL_PCM_OBJECT_DIR@
>  export OCL_PCH_PATH=@LOCAL_PCH_OBJECT_DIR@
> +export OCL_PCH_STRICT_PATH=@LOCAL_PCH_STRICT_OBJECT_DIR@
>  export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
> --
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list