[Beignet] [PATCH 2/4] Runtime: Add API clGetKernelSubGroupInfoKHR for subgroup extension
Xiuli Pan
xiuli.pan at intel.com
Mon May 16 01:55:21 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
include/CL/cl_intel.h | 27 +++++++++++++++++
src/cl_api.c | 20 +++++++++++++
src/cl_device_id.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++
src/cl_device_id.h | 9 ++++++
4 files changed, 139 insertions(+)
diff --git a/include/CL/cl_intel.h b/include/CL/cl_intel.h
index 0ea4af4..47bae46 100644
--- a/include/CL/cl_intel.h
+++ b/include/CL/cl_intel.h
@@ -170,6 +170,33 @@ typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateImageFromFdINTEL_fn)(
const cl_import_image_info_intel * /* info */,
cl_int * /* errcode_ret */);
+#ifndef CL_VERSION_2_0
+typedef cl_uint cl_kernel_sub_group_info;
+
+/* cl_khr_sub_group_info */
+#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR 0x2033
+#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR 0x2034
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelSubGroupInfoKHR(cl_kernel /* in_kernel */,
+ cl_device_id /*in_device*/,
+ cl_kernel_sub_group_info /* param_name */,
+ size_t /*input_value_size*/,
+ const void * /*input_value*/,
+ size_t /*param_value_size*/,
+ void* /*param_value*/,
+ size_t* /*param_value_size_ret*/ );
+
+typedef CL_API_ENTRY cl_int
+ ( CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel /* in_kernel */,
+ cl_device_id /*in_device*/,
+ cl_kernel_sub_group_info /* param_name */,
+ size_t /*input_value_size*/,
+ const void * /*input_value*/,
+ size_t /*param_value_size*/,
+ void* /*param_value*/,
+ size_t* /*param_value_size_ret*/ );
+#endif
#ifdef __cplusplus
}
#endif
diff --git a/src/cl_api.c b/src/cl_api.c
index 881ea6d..85ed4cf 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1348,6 +1348,26 @@ clGetKernelWorkGroupInfo(cl_kernel kernel,
}
cl_int
+clGetKernelSubGroupInfoKHR(cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t input_value_size,
+ const void * input_value,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret)
+{
+ return cl_get_kernel_subgroup_info(kernel,
+ device,
+ param_name,
+ input_value_size,
+ input_value,
+ param_value_size,
+ param_value,
+ param_value_size_ret);
+}
+
+cl_int
clWaitForEvents(cl_uint num_events,
const cl_event * event_list)
{
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index f8e06e2..4e6a575 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -27,6 +27,7 @@
#include "cl_thread.h"
#include "CL/cl.h"
#include "CL/cl_ext.h"
+#include "CL/cl_intel.h"
#include "cl_gbe_loader.h"
#include "cl_alloc.h"
@@ -1086,3 +1087,85 @@ error:
return err;
}
+LOCAL cl_int
+cl_get_kernel_subgroup_info(cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t input_value_size,
+ const void* input_value,
+ size_t param_value_size,
+ void* param_value,
+ size_t* param_value_size_ret)
+{
+ int err = CL_SUCCESS;
+ if(device != NULL)
+ if (kernel->program->ctx->device != device)
+ return CL_INVALID_DEVICE;
+
+ CHECK_KERNEL(kernel);
+ switch (param_name) {
+ case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR:
+ {
+ int i, dim = 0;
+ size_t local_sz = 1;
+ if (param_value && param_value_size < sizeof(size_t))
+ return CL_INVALID_VALUE;
+ if (param_value_size_ret != NULL)
+ *param_value_size_ret = sizeof(size_t);
+ switch (input_value_size)
+ {
+ case sizeof(size_t)*1:
+ case sizeof(size_t)*2:
+ case sizeof(size_t)*3:
+ dim = input_value_size/sizeof(size_t);
+ break;
+ default: return CL_INVALID_VALUE;
+ }
+ if (input_value == NULL )
+ return CL_INVALID_VALUE;
+ for(i = 0; i < dim; i++)
+ local_sz *= ((size_t*)input_value)[i];
+ if (param_value) {
+ size_t simd_sz = cl_kernel_get_simd_width(kernel);
+ size_t sub_group_size = local_sz >= simd_sz? simd_sz : local_sz;
+ *(size_t*)param_value = sub_group_size;
+ return CL_SUCCESS;
+ }
+ break;
+ }
+ case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR:
+ {
+ int i, dim = 0;
+ size_t local_sz = 1;
+ if (param_value && param_value_size < sizeof(size_t))
+ return CL_INVALID_VALUE;
+ if (param_value_size_ret != NULL)
+ *param_value_size_ret = sizeof(size_t);
+ switch (input_value_size)
+ {
+ case sizeof(size_t)*1:
+ case sizeof(size_t)*2:
+ case sizeof(size_t)*3:
+ dim = input_value_size/sizeof(size_t);
+ break;
+ default: return CL_INVALID_VALUE;
+ }
+ if (input_value == NULL )
+ return CL_INVALID_VALUE;
+ for(i = 0; i < dim; i++)
+ local_sz *= ((size_t*)input_value)[i];
+ if (param_value) {
+ size_t simd_sz = cl_kernel_get_simd_width(kernel);
+ size_t sub_group_num = (local_sz + simd_sz - 1) / simd_sz;
+ *(size_t*)param_value = sub_group_num;
+ return CL_SUCCESS;
+ }
+ break;
+ }
+ default:
+ return CL_INVALID_VALUE;
+ };
+
+error:
+ return err;
+}
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index b01a6fb..7db125b 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -149,6 +149,15 @@ extern cl_int cl_get_kernel_workgroup_info(cl_kernel kernel,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
+
+extern cl_int cl_get_kernel_subgroup_info(cl_kernel kernel,
+ cl_device_id device,
+ cl_kernel_work_group_info param_name,
+ size_t input_value_size,
+ const void * input_value,
+ size_t param_value_size,
+ void * param_value,
+ size_t * param_value_size_ret);
/* Returns the Gen device ID */
extern cl_int cl_device_get_version(cl_device_id device, cl_int *ver);
extern size_t cl_get_kernel_max_wg_sz(cl_kernel);
--
2.7.4
More information about the Beignet
mailing list