[Beignet] [PATCH V2] When local_work_size is null, try to choose a local_work_size.
Yang Rong
rong.r.yang at intel.com
Sun Jan 26 00:36:58 PST 2014
After fix all found fails when local_work_size is not 1, re-enalbe it to
improve performance.
V2: refine to skip some useless loop.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_api.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 405a41a..2a6f8ce 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2472,13 +2472,20 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
if (local_work_size != NULL) {
for (i = 0; i < work_dim; ++i)
fixed_local_sz[i] = local_work_size[i];
- } /*else {
- for (i = 0; i< work_dim; i++)
- for (j = 64; j > 1; j--) { //check from 64?
- if (global_work_size[i] % j == 0) //global_work_size always non null
+ } else {
+ uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256; //MAX_WORK_GROUP_SIZE may too large
+ for (i = 0; i< work_dim; i++) {
+ for (j = maxDimSize; j > 1; j--) {
+ if (global_work_size[i] % j == 0 && j <= maxGroupSize) {
fixed_local_sz[i] = j;
+ maxGroupSize = maxGroupSize /j;
+ maxDimSize = maxGroupSize > maxDimSize ? maxDimSize : maxGroupSize;
+ break; //choose next work_dim
+ }
}
- } */
+ }
+ }
+
if (global_work_size != NULL)
for (i = 0; i < work_dim; ++i)
fixed_global_sz[i] = global_work_size[i];
--
1.8.3.2
More information about the Beignet
mailing list