[Beignet] [PATCH V2] GBE: add param to switch the behavior of math func
Guo Yejun
yejun.guo at intel.com
Thu Feb 13 03:42:19 CET 2014
Add OCL_STRICT_CONFORMANCE to switch the behavior of math func,
The funcs will be high precision with perf drops if it is 1,
Fast path with good enough precision will be selected if it is 0.
This change is to add the code basis, with 'sin' implmented as
an example, other math functions support will be added later.
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
backend/CMakeLists.txt | 3 ++-
backend/src/CMakeLists.txt | 13 ++++++++++---
backend/src/GBEConfig.h.in | 1 +
backend/src/backend/program.cpp | 12 +++++++++++-
backend/src/ocl_stdlib.tmpl.h | 27 +++++++++++++++++++++++++++
utests/setenv.sh.in | 1 +
6 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/backend/CMakeLists.txt b/backend/CMakeLists.txt
index dd55a4a..08122da 100644
--- a/backend/CMakeLists.txt
+++ b/backend/CMakeLists.txt
@@ -98,8 +98,9 @@ include_directories (${CMAKE_CURRENT_BINARY_DIR})
##############################################################
add_subdirectory (src)
set(LOCAL_PCH_OBJECT_DIR ${LOCAL_PCH_OBJECT_DIR} PARENT_SCOPE)
+set(LOCAL_PCH_STRICT_OBJECT_DIR ${LOCAL_PCH_STRICT_OBJECT_DIR} PARENT_SCOPE)
set(LOCAL_PCM_OBJECT_DIR ${LOCAL_PCM_OBJECT_DIR} PARENT_SCOPE)
set (GBE_BIN_GENERATER
- OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR} OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
+ OCL_PCM_PATH=${LOCAL_PCM_OBJECT_DIR} OCL_PCH_PATH=${LOCAL_PCH_OBJECT_DIR} OCL_PCH_STRICT_PATH=${LOCAL_PCH_STRICT_OBJECT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/src/gbe_bin_generater
PARENT_SCOPE)
diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 33494a0..df36069 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -43,6 +43,8 @@ add_custom_command(
set (pch_object ${ocl_blob_file}.pch)
set (local_pch_object ${ocl_blob_file}.local.pch)
+set (pch_strict_object ${ocl_blob_file}.strict.pch)
+set (local_pch_strict_object ${ocl_blob_file}.strict.local.pch)
# generate pch object
if (LLVM_VERSION_NODOT VERSION_GREATER 32)
set (clang_cmd -cc1 -x cl -triple spir -ffp-contract=off)
@@ -56,15 +58,17 @@ endif (LLVM_VERSION_NODOT VERSION_GREATER 32)
set (clang_cmd ${clang_cmd} -fno-builtin -DGEN7_SAMPLER_CLAMP_BORDER_WORKAROUND)
add_custom_command(
- OUTPUT ${pch_object}
- COMMAND rm -f ${pch_object}
+ OUTPUT ${pch_object} ${pch_strict_object}
+ COMMAND rm -f ${pch_object} ${pch_strict_object}
COMMAND clang ${clang_cmd} --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_object}
COMMAND clang ${clang_cmd} -emit-pch ${ocl_blob_file} -o ${local_pch_object}
+ COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1 --relocatable-pch -emit-pch -isysroot ${CMAKE_CURRENT_BINARY_DIR} ${ocl_blob_file} -o ${pch_strict_object}
+ COMMAND clang ${clang_cmd} -DOCL_STRICT_CONFORMANCE=1 -emit-pch ${ocl_blob_file} -o ${local_pch_strict_object}
DEPENDS ${ocl_blob_file}
)
add_custom_target(pch_object
- DEPENDS ${pch_object})
+ DEPENDS ${pch_object} ${pch_strict_object})
macro(ll_add_library ll_lib ll_sources)
foreach (ll ${${ll_sources}})
@@ -196,13 +200,16 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
#install (FILES backend/program.h DESTINATION include/gen)
install (FILES ${ocl_blob_file} DESTINATION ${LIB_INSTALL_DIR}/beignet)
install (FILES ${pch_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
+install (FILES ${pch_strict_object} DESTINATION ${LIB_INSTALL_DIR}/beignet)
install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION ${LIB_INSTALL_DIR}/beignet)
# When build beignet itself, we need to export the local precompiled header file and precompiled module
# file to libcl and utests.
set (LOCAL_PCH_OBJECT_DIR "${local_pch_object}:${beignet_install_path}/ocl_stdlib.h.pch" PARENT_SCOPE)
+set (LOCAL_PCH_STRICT_OBJECT_DIR "${local_pch_strict_object}:${beignet_install_path}/ocl_stdlib.h.strict.pch" PARENT_SCOPE)
set (LOCAL_PCM_OBJECT_DIR "${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib}:${beignet_install_path}/${pcm_lib}" PARENT_SCOPE)
set (PCH_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.pch")
+set (PCH_STRICT_OBJECT_DIR "${beignet_install_path}/ocl_stdlib.h.strict.pch")
set (PCM_OBJECT_DIR "${beignet_install_path}/${pcm_lib}")
configure_file (
"GBEConfig.h.in"
diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in
index 5bc09b8..c446754 100644
--- a/backend/src/GBEConfig.h.in
+++ b/backend/src/GBEConfig.h.in
@@ -2,4 +2,5 @@
#define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@
#define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@
#define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@"
+#define PCH_STRICT_OBJECT_DIR "@PCH_STRICT_OBJECT_DIR@"
#define PCM_OBJECT_DIR "@PCM_OBJECT_DIR@"
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 2492a8b..496a9a0 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -466,6 +466,7 @@ namespace gbe {
BVAR(OCL_OUTPUT_BUILD_LOG, false);
SVAR(OCL_PCH_PATH, PCH_OBJECT_DIR);
+ SVAR(OCL_PCH_STRICT_PATH, PCH_STRICT_OBJECT_DIR);
SVAR(OCL_PCM_PATH, PCM_OBJECT_DIR);
static bool buildModuleFromSource(const char* input, const char* output, std::string options,
@@ -646,6 +647,7 @@ namespace gbe {
extern std::string ocl_stdlib_str;
BVAR(OCL_USE_PCH, true);
+ BVAR(OCL_STRICT_CONFORMANCE, true);
static gbe_program programNewFromSource(const char *source,
size_t stringSize,
const char *options,
@@ -743,6 +745,10 @@ namespace gbe {
}
std::string dirs = OCL_PCH_PATH;
+ if (OCL_STRICT_CONFORMANCE){
+ dirs = OCL_PCH_STRICT_PATH;
+ }
+
std::istringstream idirs(dirs);
std::string pchFileName;
@@ -757,8 +763,12 @@ namespace gbe {
clOpt += " -include-pch ";
clOpt += pchFileName;
clOpt += " ";
- } else
+ } else {
+ if (OCL_STRICT_CONFORMANCE){
+ clOpt += " -DOCL_STRICT_CONFORMANCE=1 ";
+ }
fwrite(ocl_stdlib_str.c_str(), strlen(ocl_stdlib_str.c_str()), 1, clFile);
+ }
// Write the source to the cl file
fwrite(source, strlen(source), 1, clFile);
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index d191b8e..8401f0f 100755
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -4462,6 +4462,33 @@ INLINE_OVERLOADABLE size_t get_image_array_size(image1d_array_t image)
{ return __gen_ocl_get_image_array_size(image); }
#endif
+
+
+/// It is required by OpenCL that built-in math functions (without native_/half_)
+/// have high precision, but GPU hardware is designed to be good enough precision,
+/// so most functions will be emulated with higph and make performance drops.
+/// This is not an issue if the applications could choose the proper functions, for
+/// example, use native_* functions for cases without highp requirement.
+/// Due to the fact that applications always use math functions without native_/half_,
+/// environment variable OCL_STRICT_CONFORMANCE is introduced to switch the behavior
+/// of the math functions.
+/// The math functions will be emulated with highp if OCL_STRICT_CONFORMANCE is 1 (the following code block is disable),
+/// and choose fast path with good enough precision if OCL_STRICT_CONFORMANCE is 0 (the following code block is enabled).
+#ifndef OCL_STRICT_CONFORMANCE
+
+#ifdef sin
+#undef sin
+#endif
+#define sin __gen_ocl_internal_intelnative_sin
+INLINE_OVERLOADABLE float __gen_ocl_internal_intelnative_sin(float x)
+{
+ return native_sin(x);
+}
+
+#endif //OCL_STRICT_CONFORMANCE
+
+
+
#pragma OPENCL EXTENSION cl_khr_fp64 : disable
#undef DECL_IMAGE
diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in
index ad77369..17a2e28 100644
--- a/utests/setenv.sh.in
+++ b/utests/setenv.sh.in
@@ -2,4 +2,5 @@
#
export OCL_PCM_PATH=@LOCAL_PCM_OBJECT_DIR@
export OCL_PCH_PATH=@LOCAL_PCH_OBJECT_DIR@
+export OCL_PCH_STRICT_PATH=@LOCAL_PCH_STRICT_OBJECT_DIR@
export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
--
1.7.9.5
More information about the Beignet
mailing list