[Beignet] [PATCH] GBE: fix clang's "incorrect" optimization for barrier call.

Yang, Rong R rong.r.yang at intel.com
Tue Dec 17 00:21:02 PST 2013


One comment.

-----Original Message-----
From: beignet-bounces at lists.freedesktop.org [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Zhigang Gong
Sent: Friday, December 13, 2013 2:47 PM
To: beignet at lists.freedesktop.org
Cc: Gong, Zhigang
Subject: [Beignet] [PATCH] GBE: fix clang's "incorrect" optimization for barrier call.

Clang may duplicate one barrier call to multiple branches which breaks opencl's spec and may cause gpu hang. To fix this issue, we have to implement the barrier in a llvm module file and specify the function attribute to noduplicate, and we have to link this pre-compiled module before we compile the user kernel, so we set it the pcm lib file to the LinkBitCodeFile field of the clang instance.

Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
---
 backend/src/CMakeLists.txt      |   29 ++++++++++++++++++++++++++++-
 backend/src/GBEConfig.h.in      |    1 +
 backend/src/backend/program.cpp |   17 +++++++++++++++++
 backend/src/ocl_barrier.ll      |   39 +++++++++++++++++++++++++++++++++++++++
 backend/src/ocl_stdlib.tmpl.h   |    9 +--------
 5 files changed, 86 insertions(+), 9 deletions(-)  create mode 100644 backend/src/ocl_barrier.ll

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt index 36bf688..fa69321 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -59,6 +59,26 @@ add_custom_command(
 add_custom_target(pch_object
                   DEPENDS ${pch_object})
 
+macro(ll_add_library ll_lib ll_sources)
+  foreach (ll ${${ll_sources}})
+  add_custom_command(
+       OUTPUT  ${ll}.bc
+       COMMAND rm -f ${ll}.bc
+       COMMAND llvm-as -o ${ll}.bc ${GBE_SOURCE_DIR}/src/${ll}
+       DEPENDS ${ll}
+       )
+  set (ll_objects ${ll_objects} ${ll}.bc)
+  endforeach (ll ${ll_sources})
+  add_custom_command(
+       OUTPUT ${ll_lib}
+       COMMAND llvm-link -o ${ll_lib} ${ll_objects}
+       DEPENDS ${ll_objects}
+       )
+  add_custom_target(${ll_lib}
+                    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ll_lib})
+  message(${ll_lib}, ${ll_objects})
+endmacro(ll_add_library)
+
 if (GBE_USE_BLOB)
   set (GBE_SRC
        blob.cpp
@@ -144,7 +164,12 @@ link_directories (${LLVM_LIBRARY_DIRS})
 include_directories(${LLVM_INCLUDE_DIRS})
 add_library (gbe SHARED ${GBE_SRC})
 
-ADD_DEPENDENCIES (gbe pch_object)
+# for pre compiled module library.
+set (pcm_lib "beignet.bc")
+set (pcm_sources ocl_barrier.ll)
+ll_add_library (${pcm_lib} pcm_sources)
+
+ADD_DEPENDENCIES (gbe pch_object ${pcm_lib})
 target_link_libraries(
                       gbe
                       ${DRM_INTEL_LIBRARY} @@ -161,9 +186,11 @@ TARGET_LINK_LIBRARIES(gbe_bin_generater gbe)
 
 install (TARGETS gbe LIBRARY DESTINATION lib)  install (FILES ${pch_object} DESTINATION lib)
+install (FILES ${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib} DESTINATION lib)
 install (FILES backend/program.h DESTINATION include/gen)
 
 set (PCH_OBJECT_DIR "${pch_object};${CMAKE_INSTALL_PREFIX}/lib/ocl_stdlib.h.pch")
+set (PCM_LIB_DIR 
+"${CMAKE_CURRENT_BINARY_DIR}/${pcm_lib};${CMAKE_INSTALL_PREFIX}/lib/${p
+cm_lib}")
 configure_file (
   "GBEConfig.h.in"
   "GBEConfig.h"
diff --git a/backend/src/GBEConfig.h.in b/backend/src/GBEConfig.h.in index 74bef3f..9920d25 100644
--- a/backend/src/GBEConfig.h.in
+++ b/backend/src/GBEConfig.h.in
@@ -2,3 +2,4 @@
 #define LIBGBE_VERSION_MAJOR @LIBGBE_VERSION_MAJOR@  #define LIBGBE_VERSION_MINOR @LIBGBE_VERSION_MINOR@  #define PCH_OBJECT_DIR "@PCH_OBJECT_DIR@"
+#define PCM_LIB_DIR "@PCM_LIB_DIR@"
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp index fc9b03c..46ec04f 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -587,6 +587,21 @@ namespace gbe {
 
     // Create an action and make the compiler instance carry it out
     llvm::OwningPtr<clang::CodeGenAction> Act(new clang::EmitLLVMOnlyAction());
+
+    std::string dirs = PCM_LIB_DIR, pcmLib;
+    std::istringstream idirs(dirs);
+    bool findPcm = false;
+
+    while (getline(idirs, pcmLib, ';')) {
+      if(access(pcmLib.c_str(), R_OK) == 0) {
+        findPcm = true;
+        break;
+      }
+    }
+
+    GBE_ASSERT(findPcm && "Could not find pre compiled module 
+ library.\n");
+
+    Clang.getCodeGenOpts().LinkBitcodeFile = pcmLib;
     auto retVal = Clang.ExecuteAction(*Act);
 
     if (err != NULL) {
@@ -755,6 +770,8 @@ namespace gbe {
       if (err != NULL)
         *errSize += clangErrSize;
       gbe_mutex.unlock();
+      if (OCL_OUTPUT_BUILD_LOG && options)
+        llvm::errs() << options;
       remove(llName.c_str());
     } else
       p = NULL;
diff --git a/backend/src/ocl_barrier.ll b/backend/src/ocl_barrier.ll new file mode 100644 index 0000000..0f5f104
--- /dev/null
+++ b/backend/src/ocl_barrier.ll
@@ -0,0 +1,39 @@
+;XXX FIXME as llvm can't use macros, we hardcoded 3, 1, 2 ;here, we may 
+need to use a more grace way to handle this type ;of values latter.
+;#define CLK_LOCAL_MEM_FENCE  (1 << 0)
+;#define CLK_GLOBAL_MEM_FENCE (1 << 1)
+
+declare i32 @_get_local_mem_fence() nounwind alwaysinline declare i32 
+ at _get_global_mem_fence() nounwind alwaysinline declare void 
+ at __gen_ocl_barrier_local() nounwind noduplicate alwaysinline declare 
+void @__gen_ocl_barrier_global() nounwind noduplicate alwaysinline 
+declare void @__gen_ocl_barrier_local_and_global() nounwind noduplicate 
+alwaysinline
+
+define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
+  %1 = icmp eq i32 %flags, 3
+  br i1 %1, label %barrier_local_global, label %barrier_local_check
+
+barrier_local_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate
+  br label %done
+
+barrier_local_check:
+  %2 = icmp eq i32 %flags, 1
+  br i1 %2, label %barrier_local, label %barrier_global_check
+
+barrier_local:
+  call void @__gen_ocl_barrier_local() noduplicate
+  br label %done
+
+barrier_global_check:
+  %3 = icmp eq i32 %flags, 2
+  br i1 %3, label %barrier_global, label %done
+
+barrier_global:
+  call void @__gen_ocl_barrier_local_and_global() noduplicate

>>>> Is it @__gen_ocl_barrier_global()?

+  br label %done
+
+done:
+  ret void
+}
diff --git a/backend/src/ocl_stdlib.tmpl.h b/backend/src/ocl_stdlib.tmpl.h index e5f356e..a4989ed 100644
--- a/backend/src/ocl_stdlib.tmpl.h
+++ b/backend/src/ocl_stdlib.tmpl.h
@@ -2685,14 +2685,7 @@ void __gen_ocl_barrier_global(void);  void __gen_ocl_barrier_local_and_global(void);
 
 typedef uint cl_mem_fence_flags;
-INLINE void barrier(cl_mem_fence_flags flags) {
-  if (flags == (CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE))
-    __gen_ocl_barrier_local_and_global();
-  else if (flags == CLK_LOCAL_MEM_FENCE)
-    __gen_ocl_barrier_local();
-  else if (flags == CLK_GLOBAL_MEM_FENCE)
-    __gen_ocl_barrier_global();
-}
+void barrier(cl_mem_fence_flags flags);
 
 INLINE void mem_fence(cl_mem_fence_flags flags) {  }
--
1.7.9.5

_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list