[Beignet] [PATCH] GBE: fix clang's "incorrect" optimization for barrier call.

Zhigang Gong zhigang.gong at intel.com
Thu Dec 12 22:46:37 PST 2013


Clang may duplicate one barrier call to multiple branches which
breaks opencl's spec and may cause gpu hang. To fix this issue,
we have to implement the barrier in a llvm module file and specify
the function attribute to noduplicate, and we have to link this
pre-compiled module before we compile the user kernel, so we set
it the pcm lib file to the LinkBitCodeFile field of the clang
instance.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/CMakeLists.txt      |   29 ++++++++++++++++++++++++++++-
 backend/src/GBEConfig.h.in      |    1 +
 backend/src/backend/program.cpp |   17 +++++++++++++++++
 backend/src/ocl_barrier.ll      |   39 +++++++++++++++++++++++++++++++++++++++
 backend/src/ocl_stdlib.tmpl.h   |    9 +--------
 5 files changed, 86 insertions(+), 9 deletions(-)
 create mode 100644 backend/src/ocl_barrier.ll

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index 36bf688..fa69321 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -59,6 +59,26 @@ add_custom_command(
 add_custom_target(pch_object
                   DEPENDS ${pch_object})
 
+macro(ll_add_library ll_lib ll_sources)
+  foreach (ll ${${ll_sources}})
+  add_custom_command(
+       OUTPUT  ${ll}.bc
+       COMMAND rm -f ${ll}.bc
+       COMMAND llvm-as -o ${ll}.bc ${GBE_SOURCE_DIR}/src/${ll}
+       DEPENDS ${ll}
+       )
+  set (ll_objects ${ll_objects} ${ll}.bc)
+  endforeach (ll ${ll_sources})
+  add_custom_command(
+       OUTPUT ${ll_lib}
+       COMMAND llvm-link -o ${ll_lib} ${ll_objects}
+       DEPENDS ${ll_objects}
+       )
+  add_custom_target(${ll_lib}
+                    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ll_lib})
+  message(${ll_lib}, ${ll_objects})
+endmacro(ll_add_library)
+
 if (GBE_USE_BLOB)
   set (GBE_SRC
        blob.cpp
@@ -144,7 +164,12 @@ link_directories (${LLVM_LIBRARY_DIRS})
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library (gbe SHARED ${GBE_SRC})
 
-ADD_DEPENDENCIES (gbe pch_object)
+# for pre compiled module library.
+set (pcm_lib "beignet.bc")
+set (pcm_sources ocl_barrier.ll)
+ll_add_library (${pcm_lib} pcm_sources)
+
+ADD_DEPENDENCIES (gbe pch_object ${pcm_lib})
 target_link_libraries(
                       gbe
                       ${DRM_INTEL_LIBRARY}
@@ -161,9 +186,11 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
 
 install (TARGETS gbe LIBRARY DESTINATION lib)
 install (FILES ${pch_object} DESTINATION lib)
+install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION lib)
 install (FILES backend/program.h DESTINATION include/gen)
 
 set (PCH_OBJECT_DIR "${pch_object};${CMAKE_INSTALL_PREFIX}/lib/ocl_stdlib.h.pch")
+set (PCM_LIB_DIR "${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib};${CMAKE_INSTALL_PREFIX}/lib/${pcm_lib}")
 configure_file (
   "GBEConfig.h.in"
   "GBEConfig.h"
diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in
index 74bef3f..9920d25 100644
--- a/backend/src/GBEConfig.h.in
+++ b/backend/src/GBEConfig.h.in
@@ -2,3 +2,4 @@
 #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@
 #define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@
 #define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@"
+#define PCM_LIB_DIR "@PCM_LIB_DIR@"
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index fc9b03c..46ec04f 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -587,6 +587,21 @@ namespace gbe {
 
     // Create an action and make the compiler instance carry it out
     llvm::OwningPtr<clang::CodeGenAction> Act(new clang::EmitLLVMOnlyAction());
+
+    std::string dirs = PCM_LIB_DIR, pcmLib;
+    std::istringstream idirs(dirs);
+    bool findPcm = false;
+
+    while (getline(idirs, pcmLib, ';')) {
+      if(access(pcmLib.c_str(), R_OK) == 0) {
+        findPcm = true;
+        break;
+      }
+    }
+
+    GBE_ASSERT(findPcm && "Could not find pre compiled module library.\n");
+
+    Clang.getCodeGenOpts().LinkBitcodeFile = pcmLib;
     auto retVal = Clang.ExecuteAction(*Act);
 
     if (err != NULL) {
@@ -755,6 +770,8 @@ namespace gbe {
       if (err != NULL)
         *errSize += clangErrSize;
       gbe_mutex.unlock();
+      if (OCL_OUTPUT_BUILD_LOG && options)
+        llvm::errs() << options;
       remove(llName.c_str());
     } else
       p = NULL;
diff --git a/backend/src/ocl_barrier.ll b/backend/src/ocl_barrier.ll
new file mode 100644
index 0000000..0f5f104
--- /dev/null
+++ b/backend/src/ocl_barrier.ll
@@ -0,0 +1,39 @@
+;XXX FIXME as llvm can't use macros, we hardcoded 3, 1, 2
+;here, we may need to use a more grace way to handle this type
+;of values latter.
+;#define CLK_LOCAL_MEM_FENCE  (1 << 0)
+;#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+declare i32 @_get_local_mem_fence() nounwind alwaysinline
+declare i32 @_get_global_mem_fence() nounwind alwaysinline
+declare void @__gen_ocl_barrier_local() nounwind noduplicate alwaysinline
+declare void @__gen_ocl_barrier_global() nounwind noduplicate alwaysinline
+declare void @__gen_ocl_barrier_local_and_global() nounwind noduplicate alwaysinline
+
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+  %1 = icmp eq i32 %flags, 3
+  br i1 %1, label %barrier_local_global, label %barrier_local_check
+
+barrier_local_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate
+  br label %done
+
+barrier_local_check:
+  %2 = icmp eq i32 %flags, 1
+  br i1 %2, label %barrier_local, label %barrier_global_check
+
+barrier_local:
+  call void @__gen_ocl_barrier_local() noduplicate
+  br label %done
+
+barrier_global_check:
+  %3 = icmp eq i32 %flags, 2
+  br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate
+  br label %done
+
+done:
+  ret void
+}
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h
index e5f356e..a4989ed 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2685,14 +2685,7 @@ void __gen_ocl_barrier_global(void);
 void __gen_ocl_barrier_local_and_global(void);
 
 typedef uint cl_mem_fence_flags;
-INLINE void barrier(cl_mem_fence_flags flags) {
-  if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))
-    __gen_ocl_barrier_local_and_global();
-  else if (flags == CLK_LOCAL_MEM_FENCE)
-    __gen_ocl_barrier_local();
-  else if (flags == CLK_GLOBAL_MEM_FENCE)
-    __gen_ocl_barrier_global();
-}
+void barrier(cl_mem_fence_flags flags);
 
 INLINE void mem_fence(cl_mem_fence_flags flags) {
 }
-- 
1.7.9.5



More information about the Beignet mailing list