[Beignet] [PATCH V2] output warning message if the global/local_work_size is not good in Debug mode

Guo Yejun yejun.guo at intel.com
Tue Dec 15 17:00:15 PST 2015


the known issue is that utest will output lots of warning messages
since the total number of work-items is less than 64.

V2: use DEBUGP to wrap the code, and also refine the macro
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 src/cl_api.c                | 11 ++++++++++-
 src/cl_command_queue_gen7.c |  6 +++---
 src/cl_utils.h              | 12 ++++++++++--
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 510941e..05ca889 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2999,6 +2999,7 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
         fixed_local_sz[1] = 1;
     } else {
       uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256; //MAX_WORK_GROUP_SIZE may too large
+      size_t realGroupSize = 1;
       for (i = 0; i< work_dim; i++) {
         for (j = maxDimSize; j > 1; j--) {
           if (global_work_size[i] % j == 0 && j <= maxGroupSize) {
@@ -3008,7 +3009,10 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
             break;  //choose next work_dim
           }
         }
+        realGroupSize *= fixed_local_sz[i];
       }
+      if (realGroupSize < 8 || realGroupSize % 8 != 0)
+        DEBUGP(DL_WARNING, "unable to find good values for local_work_size[i], please provide local_work_size[] explicitly, you can find good values with trial-and-error method.");
     }
   }
 
@@ -3016,8 +3020,13 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
     fixed_global_sz[0] = (global_work_size[0]+15) / 16 * 16;
     fixed_global_sz[1] = (global_work_size[1]+15) / 16;
   } else {
-    for (i = 0; i < work_dim; ++i)
+    size_t total_work_items = 1;
+    for (i = 0; i < work_dim; ++i) {
       fixed_global_sz[i] = global_work_size[i];
+      total_work_items *= fixed_global_sz[i];
+    }
+    if (total_work_items < 64)
+      DEBUGP(DL_WARNING, "too small work-items (see values in global_work_size[]) might result in bad performance.");
   }
   if (global_work_offset != NULL)
     for (i = 0; i < work_dim; ++i)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 44db7ed..791a7ca 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -369,21 +369,21 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
 
   /* Compute the number of HW threads we need */
   if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, &local_sz) != CL_SUCCESS)) {
-    DEBUGP("Beignet: Work group size exceed Kernel's work group size.\n");
+    DEBUGP(DL_ERROR, "Work group size exceed Kernel's work group size.");
     return err;
   }
   kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
   kernel.curbe_sz = cst_sz;
 
   if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
-    DEBUGP("Beignet: Out of scratch memory %d.\n", scratch_sz);
+    DEBUGP(DL_ERROR, "Out of scratch memory %d.", scratch_sz);
     return CL_OUT_OF_RESOURCES;
   }
   /* Curbe step 1: fill the constant urb buffer data shared by all threads */
   if (ker->curbe) {
     kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n);
     if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
-      DEBUGP("Beignet: Out of shared local memory %d.\n", kernel.slm_sz);
+      DEBUGP(DL_ERROR, "Out of shared local memory %d.", kernel.slm_sz);
       return CL_OUT_OF_RESOURCES;
     }
   }
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 7595158..a0e37b2 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -31,11 +31,19 @@
 #define JOIN(X, Y) _DO_JOIN(X, Y)
 #define _DO_JOIN(X, Y) _DO_JOIN2(X, Y)
 #define _DO_JOIN2(X, Y) X##Y
+enum DEBUGP_LEVEL
+{
+    DL_INFO,
+    DL_WARNING,
+    DL_ERROR
+};
 #ifdef NDEBUG
   #define DEBUGP(...)
 #else
-  #define DEBUGP(fmt, ...)        \
-  fprintf(stderr, fmt, ##__VA_ARGS__)
+  //TODO: print or not with the value of level (DEBUGP_LEVEL)
+  #define DEBUGP(level, fmt, ...)        \
+  fprintf(stderr, "Beignet: "#fmt, ##__VA_ARGS__);  \
+  fprintf(stderr, "\n");
 #endif
 
 /* Check compile time errors */
-- 
1.9.1



More information about the Beignet mailing list