[Beignet] [PATCH] BDW: Change BDW's max work group size to 512.

Yang Rong rong.r.yang at intel.com
Wed Oct 15 01:26:14 PDT 2014


Opencv only query and use device max work group size, when SLM/Barrier enable, BDW
can't fill 1024 work group in one subslice, even in SIMD16. Change device's max work
group size temp.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_device_id.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 7944ca4..c911c23 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -110,7 +110,7 @@ static struct _cl_device_id intel_brw_gt1_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 2,
   .max_work_item_sizes = {1024, 1024, 1024},
-  .max_work_group_size = 1024,
+  .max_work_group_size = 512,
   .max_clock_frequency = 1000,
 #include "cl_gen75_device.h"
 };
@@ -121,7 +121,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 3,
   .max_work_item_sizes = {1024, 1024, 1024},
-  .max_work_group_size = 1024,
+  .max_work_group_size = 512,
   .max_clock_frequency = 1000,
 #include "cl_gen75_device.h"
 };
@@ -132,7 +132,7 @@ static struct _cl_device_id intel_brw_gt3_device = {
   .max_thread_per_unit = 7,
   .sub_slice_count = 6,
   .max_work_item_sizes = {1024, 1024, 1024},
-  .max_work_group_size = 1024,
+  .max_work_group_size = 512,
   .max_clock_frequency = 1000,
 #include "cl_gen75_device.h"
 };
@@ -648,6 +648,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
     if(thread_cnt > 64)
       thread_cnt = 64;
     work_group_size = thread_cnt * simd_width;
+    if(work_group_size > kernel->program->ctx->device->max_work_group_size)
+      work_group_size = kernel->program->ctx->device->max_work_group_size;
   }
   return work_group_size;
 }
-- 
1.9.1



More information about the Beignet mailing list