[Beignet] [PATCH 7/8] BDW: Add device's sub slice field, for cl_get_kernel_max_wg_sz.
Yang Rong
rong.r.yang at intel.com
Sun Sep 28 22:38:36 PDT 2014
When SLM enable, get kernal max workgroup size should return the a sub slice's max thread * simdwidth.
So need the sub slice information.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_device_id.c | 13 +++++++++++--
src/cl_device_id.h | 1 +
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 9e63e81..a1e3e82 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -40,6 +40,7 @@ static struct _cl_device_id intel_ivb_gt2_device = {
INIT_ICD(dispatch)
.max_compute_unit = 16,
.max_thread_per_unit = 8,
+ .sub_slice_count = 2,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -50,6 +51,7 @@ static struct _cl_device_id intel_ivb_gt1_device = {
INIT_ICD(dispatch)
.max_compute_unit = 6,
.max_thread_per_unit = 6,
+ .sub_slice_count = 1,
.max_work_item_sizes = {512, 512, 512},
.max_work_group_size = 512,
.max_clock_frequency = 1000,
@@ -60,6 +62,7 @@ static struct _cl_device_id intel_baytrail_t_device = {
INIT_ICD(dispatch)
.max_compute_unit = 4,
.max_thread_per_unit = 8,
+ .sub_slice_count = 1,
.max_work_item_sizes = {512, 512, 512},
.max_work_group_size = 512,
.max_clock_frequency = 1000,
@@ -71,6 +74,7 @@ static struct _cl_device_id intel_hsw_gt1_device = {
INIT_ICD(dispatch)
.max_compute_unit = 10,
.max_thread_per_unit = 7,
+ .sub_slice_count = 1,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -81,6 +85,7 @@ static struct _cl_device_id intel_hsw_gt2_device = {
INIT_ICD(dispatch)
.max_compute_unit = 20,
.max_thread_per_unit = 7,
+ .sub_slice_count = 2,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -91,6 +96,7 @@ static struct _cl_device_id intel_hsw_gt3_device = {
INIT_ICD(dispatch)
.max_compute_unit = 40,
.max_thread_per_unit = 7,
+ .sub_slice_count = 4,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -102,6 +108,7 @@ static struct _cl_device_id intel_brw_gt1_device = {
INIT_ICD(dispatch)
.max_compute_unit = 12,
.max_thread_per_unit = 7,
+ .sub_slice_count = 2,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -112,6 +119,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
INIT_ICD(dispatch)
.max_compute_unit = 24,
.max_thread_per_unit = 7,
+ .sub_slice_count = 3,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -122,6 +130,7 @@ static struct _cl_device_id intel_brw_gt3_device = {
INIT_ICD(dispatch)
.max_compute_unit = 48,
.max_thread_per_unit = 7,
+ .sub_slice_count = 6,
.max_work_item_sizes = {1024, 1024, 1024},
.max_work_group_size = 1024,
.max_clock_frequency = 1000,
@@ -634,8 +643,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
work_group_size = kernel->program->ctx->device->max_compute_unit *
kernel->program->ctx->device->max_thread_per_unit * simd_width;
} else
- work_group_size = kernel->program->ctx->device->max_work_group_size /
- (16 / simd_width);
+ work_group_size = kernel->program->ctx->device->max_compute_unit * simd_width *
+ kernel->program->ctx->device->max_thread_per_unit / kernel->program->ctx->device->sub_slice_count;
return work_group_size;
}
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 31bce47..afc32e2 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -27,6 +27,7 @@ struct _cl_device_id {
cl_uint vendor_id;
cl_uint max_compute_unit; // maximum EU number
cl_uint max_thread_per_unit; // maximum EU threads per EU.
+ cl_uint sub_slice_count; // Device's sub slice count
cl_uint max_work_item_dimensions; // should be 3.
size_t max_work_item_sizes[3]; // equal to maximum work group size.
size_t max_work_group_size; // maximum work group size under simd16 mode.
--
1.8.3.2
More information about the Beignet
mailing list