[Beignet] [PATCH] output warning message if do not find a good local_work_size
Guo Yejun
yejun.guo at intel.com
Mon Dec 21 15:49:24 PST 2015
If the user provides local_work_size as NULL in clEnqueueNDRangeKernel,
and we could not find a good value inside driver, output a warning
message with macro DEBUGP, and also refine the macro.
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
src/cl_api.c | 4 ++++
src/cl_command_queue_gen7.c | 6 +++---
src/cl_utils.h | 12 ++++++++++--
3 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 510941e..3902592 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2999,6 +2999,7 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
fixed_local_sz[1] = 1;
} else {
uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256; //MAX_WORK_GROUP_SIZE may too large
+ size_t realGroupSize = 1;
for (i = 0; i< work_dim; i++) {
for (j = maxDimSize; j > 1; j--) {
if (global_work_size[i] % j == 0 && j <= maxGroupSize) {
@@ -3008,7 +3009,10 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
break; //choose next work_dim
}
}
+ realGroupSize *= fixed_local_sz[i];
}
+ if (realGroupSize % 8 != 0)
+ DEBUGP(DL_WARNING, "unable to find good values for local_work_size[i], please provide local_work_size[] explicitly, you can find good values with trial-and-error method.");
}
}
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 44db7ed..791a7ca 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -369,21 +369,21 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
/* Compute the number of HW threads we need */
if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, &local_sz) != CL_SUCCESS)) {
- DEBUGP("Beignet: Work group size exceed Kernel's work group size.\n");
+ DEBUGP(DL_ERROR, "Work group size exceed Kernel's work group size.");
return err;
}
kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
kernel.curbe_sz = cst_sz;
if (scratch_sz > ker->program->ctx->device->scratch_mem_size) {
- DEBUGP("Beignet: Out of scratch memory %d.\n", scratch_sz);
+ DEBUGP(DL_ERROR, "Out of scratch memory %d.", scratch_sz);
return CL_OUT_OF_RESOURCES;
}
/* Curbe step 1: fill the constant urb buffer data shared by all threads */
if (ker->curbe) {
kernel.slm_sz = cl_curbe_fill(ker, work_dim, global_wk_off, global_wk_sz, local_wk_sz, thread_n);
if (kernel.slm_sz > ker->program->ctx->device->local_mem_size) {
- DEBUGP("Beignet: Out of shared local memory %d.\n", kernel.slm_sz);
+ DEBUGP(DL_ERROR, "Out of shared local memory %d.", kernel.slm_sz);
return CL_OUT_OF_RESOURCES;
}
}
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 7595158..83f880e 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -31,11 +31,19 @@
#define JOIN(X, Y) _DO_JOIN(X, Y)
#define _DO_JOIN(X, Y) _DO_JOIN2(X, Y)
#define _DO_JOIN2(X, Y) X##Y
+enum DEBUGP_LEVEL
+{
+ DL_INFO,
+ DL_WARNING,
+ DL_ERROR
+};
#ifdef NDEBUG
#define DEBUGP(...)
#else
- #define DEBUGP(fmt, ...) \
- fprintf(stderr, fmt, ##__VA_ARGS__)
+ //TODO: decide print or not with the value of level from environment
+ #define DEBUGP(level, fmt, ...) \
+ fprintf(stderr, "Beignet: "#fmt, ##__VA_ARGS__); \
+ fprintf(stderr, "\n");
#endif
/* Check compile time errors */
--
1.9.1
More information about the Beignet
mailing list