[Beignet] [PATCH] BDW: set the BDW GT3's max_compute_unit to 47 per spec.
Yang Rong
rong.r.yang at intel.com
Tue Nov 11 21:08:01 PST 2014
Need double check BDW GT3's max thread count when SLM enable.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_device_id.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index e3b0c64..545b944 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -129,7 +129,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
static struct _cl_device_id intel_brw_gt3_device = {
INIT_ICD(dispatch)
- .max_compute_unit = 48,
+ .max_compute_unit = 47,
.max_thread_per_unit = 7,
.sub_slice_count = 6,
.max_work_item_sizes = {1024, 1024, 1024},
@@ -660,8 +660,9 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
work_group_size = kernel->program->ctx->device->max_compute_unit *
kernel->program->ctx->device->max_thread_per_unit * simd_width;
} else {
- thread_cnt = kernel->program->ctx->device->max_compute_unit *
- kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count;
+ cl_device_id device = kernel->program->ctx->device;
+ thread_cnt = (device->max_compute_unit + device->sub_slice_count - 1) / device->sub_slice_count;
+ thread_cnt = thread_cnt * device->max_thread_per_unit;
if(thread_cnt > 64)
thread_cnt = 64;
work_group_size = thread_cnt * simd_width;
--
1.9.1
More information about the Beignet
mailing list