[Beignet] [PATCH 3/3] KBL: add kabylake runtime support.
Yang Rong
rong.r.yang at intel.com
Tue Aug 2 07:36:23 UTC 2016
Kabylake is almost same as skylake, so use skylake functions directly.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_command_queue.c | 2 +-
src/cl_device_id.c | 224 +++++++++++++++++++++++++++++++++++++----------
src/cl_gen10_device.h | 31 +++++++
src/intel/intel_driver.c | 4 +-
src/intel/intel_gpgpu.c | 2 +-
5 files changed, 213 insertions(+), 50 deletions(-)
create mode 100644 src/cl_gen10_device.h
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index b66928f..07c5d89 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -222,7 +222,7 @@ cl_command_queue_ND_range(cl_command_queue queue,
/* Check that the user did not forget any argument */
TRY (cl_kernel_check_args, k);
- if (ver == 7 || ver == 75 || ver == 8 || ver == 9)
+ if (ver == 7 || ver == 75 || ver == 8 || ver == 9 || ver == 10)
TRY (cl_command_queue_ND_range_gen7, queue, k, work_dim, global_wk_off, global_wk_sz, local_wk_sz);
else
FATAL ("Unknown Gen Device");
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 66666ea..35a19a6 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -210,6 +210,61 @@ static struct _cl_device_id intel_bxt_device = {
#include "cl_gen9_device.h"
};
+static struct _cl_device_id intel_kbl_gt1_device = {
+ INIT_ICD(dispatch)
+ .max_compute_unit = 12,
+ .max_thread_per_unit = 7,
+ .sub_slice_count = 2,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt15_device = {
+ INIT_ICD(dispatch)
+ .max_compute_unit = 18,
+ .max_thread_per_unit = 7,
+ .sub_slice_count = 3,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt2_device = {
+ INIT_ICD(dispatch)
+ .max_compute_unit = 24,
+ .max_thread_per_unit = 7,
+ .sub_slice_count = 3,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt3_device = {
+ INIT_ICD(dispatch)
+ .max_compute_unit = 48,
+ .max_thread_per_unit = 7,
+ .sub_slice_count = 6,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
+static struct _cl_device_id intel_kbl_gt4_device = {
+ INIT_ICD(dispatch)
+ .max_compute_unit = 72,
+ .max_thread_per_unit = 7,
+ .sub_slice_count = 9,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen10_device.h"
+};
+
LOCAL cl_device_id
cl_get_gt_device(void)
{
@@ -580,6 +635,98 @@ bxt_break:
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
break;
+ case PCI_CHIP_KABYLAKE_ULT_GT1:
+ DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULT GT1");
+ case PCI_CHIP_KABYLAKE_DT_GT1:
+ DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Desktop GT1");
+ case PCI_CHIP_KABYLAKE_HALO_GT1:
+ DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Halo GT1");
+ case PCI_CHIP_KABYLAKE_ULX_GT1:
+ DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULX GT1");
+ case PCI_CHIP_KABYLAKE_SRV_GT1:
+ DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake Server GT1");
+kbl_gt1_break:
+ intel_kbl_gt1_device.device_id = device_id;
+ intel_kbl_gt1_device.platform = cl_get_platform_default();
+ ret = &intel_kbl_gt1_device;
+#ifdef ENABLE_FP64
+ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
+ case PCI_CHIP_KABYLAKE_ULT_GT15:
+ DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake ULT GT1.5");
+ case PCI_CHIP_KABYLAKE_DT_GT15:
+ DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake Desktop GT1.5");
+ case PCI_CHIP_KABYLAKE_HALO_GT15:
+ DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake Halo GT1.5");
+ case PCI_CHIP_KABYLAKE_ULX_GT15:
+ DECL_INFO_STRING(kbl_gt15_break, intel_kbl_gt15_device, name, "Intel(R) HD Graphics Kabylake ULX GT1.5");
+kbl_gt15_break:
+ intel_kbl_gt15_device.device_id = device_id;
+ intel_kbl_gt15_device.platform = cl_get_platform_default();
+ ret = &intel_kbl_gt15_device;
+#ifdef ENABLE_FP64
+ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
+ case PCI_CHIP_KABYLAKE_ULT_GT2:
+ case PCI_CHIP_KABYLAKE_ULT_GT2_1:
+ DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake ULT GT2");
+ case PCI_CHIP_KABYLAKE_DT_GT2:
+ DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Desktop GT2");
+ case PCI_CHIP_KABYLAKE_HALO_GT2:
+ DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Halo GT2");
+ case PCI_CHIP_KABYLAKE_ULX_GT2:
+ DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake ULX GT2");
+ case PCI_CHIP_KABYLAKE_SRV_GT2:
+ DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Server GT2");
+ case PCI_CHIP_KABYLAKE_WKS_GT2:
+ DECL_INFO_STRING(kbl_gt2_break, intel_kbl_gt2_device, name, "Intel(R) HD Graphics Kabylake Workstation GT2");
+kbl_gt2_break:
+ intel_kbl_gt2_device.device_id = device_id;
+ intel_kbl_gt2_device.platform = cl_get_platform_default();
+ ret = &intel_kbl_gt2_device;
+#ifdef ENABLE_FP64
+ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
+ case PCI_CHIP_KABYLAKE_ULT_GT3:
+ case PCI_CHIP_KABYLAKE_ULT_GT3_1:
+ case PCI_CHIP_KABYLAKE_ULT_GT3_2:
+ DECL_INFO_STRING(kbl_gt3_break, intel_kbl_gt3_device, name, "Intel(R) HD Graphics Kabylake ULT GT3");
+kbl_gt3_break:
+ intel_kbl_gt3_device.device_id = device_id;
+ intel_kbl_gt3_device.platform = cl_get_platform_default();
+ ret = &intel_kbl_gt3_device;
+#ifdef ENABLE_FP64
+ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
+ case PCI_CHIP_KABYLAKE_HALO_GT4:
+ DECL_INFO_STRING(kbl_gt4_break, intel_kbl_gt4_device, name, "Intel(R) HD Graphics Kabylake ULT GT4");
+kbl_gt4_break:
+ intel_kbl_gt4_device.device_id = device_id;
+ intel_kbl_gt4_device.platform = cl_get_platform_default();
+ ret = &intel_kbl_gt4_device;
+#ifdef ENABLE_FP64
+ cl_intel_platform_enable_extension(ret, cl_khr_fp64_ext_id);
+#endif
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
case PCI_CHIP_SANDYBRIDGE_BRIDGE:
case PCI_CHIP_SANDYBRIDGE_GT1:
case PCI_CHIP_SANDYBRIDGE_GT2:
@@ -787,6 +934,29 @@ cl_get_device_ids(cl_platform_id platform,
memcpy(param_value, device->FIELD, device->JOIN(FIELD,_sz)); \
return CL_SUCCESS;
+LOCAL cl_bool is_gen_device(cl_device_id device) {
+ return device == &intel_ivb_gt1_device ||
+ device == &intel_ivb_gt2_device ||
+ device == &intel_baytrail_t_device ||
+ device == &intel_hsw_gt1_device ||
+ device == &intel_hsw_gt2_device ||
+ device == &intel_hsw_gt3_device ||
+ device == &intel_brw_gt1_device ||
+ device == &intel_brw_gt2_device ||
+ device == &intel_brw_gt3_device ||
+ device == &intel_chv_device ||
+ device == &intel_skl_gt1_device ||
+ device == &intel_skl_gt2_device ||
+ device == &intel_skl_gt3_device ||
+ device == &intel_skl_gt4_device ||
+ device == &intel_bxt_device ||
+ device == &intel_kbl_gt1_device ||
+ device == &intel_kbl_gt15_device ||
+ device == &intel_kbl_gt2_device ||
+ device == &intel_kbl_gt3_device ||
+ device == &intel_kbl_gt4_device;
+}
+
LOCAL cl_int
cl_get_device_info(cl_device_id device,
cl_device_info param_name,
@@ -794,22 +964,7 @@ cl_get_device_info(cl_device_id device,
void * param_value,
size_t * param_value_size_ret)
{
- if (UNLIKELY(device != &intel_ivb_gt1_device &&
- device != &intel_ivb_gt2_device &&
- device != &intel_baytrail_t_device &&
- device != &intel_hsw_gt1_device &&
- device != &intel_hsw_gt2_device &&
- device != &intel_hsw_gt3_device &&
- device != &intel_brw_gt1_device &&
- device != &intel_brw_gt2_device &&
- device != &intel_brw_gt3_device &&
- device != &intel_chv_device &&
- device != &intel_skl_gt1_device &&
- device != &intel_skl_gt2_device &&
- device != &intel_skl_gt3_device &&
- device != &intel_skl_gt4_device &&
- device != &intel_bxt_device
- ))
+ if (UNLIKELY(is_gen_device(device) == CL_FALSE))
return CL_INVALID_DEVICE;
/* Find the correct parameter */
@@ -909,22 +1064,7 @@ cl_get_device_info(cl_device_id device,
LOCAL cl_int
cl_device_get_version(cl_device_id device, cl_int *ver)
{
- if (UNLIKELY(device != &intel_ivb_gt1_device &&
- device != &intel_ivb_gt2_device &&
- device != &intel_baytrail_t_device &&
- device != &intel_hsw_gt1_device &&
- device != &intel_hsw_gt2_device &&
- device != &intel_hsw_gt3_device &&
- device != &intel_brw_gt1_device &&
- device != &intel_brw_gt2_device &&
- device != &intel_brw_gt3_device &&
- device != &intel_chv_device &&
- device != &intel_skl_gt1_device &&
- device != &intel_skl_gt2_device &&
- device != &intel_skl_gt3_device &&
- device != &intel_skl_gt4_device &&
- device != &intel_bxt_device
- ))
+ if (UNLIKELY(is_gen_device(device) == CL_FALSE))
return CL_INVALID_DEVICE;
if (ver == NULL)
return CL_SUCCESS;
@@ -942,6 +1082,10 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
|| device == &intel_skl_gt3_device || device == &intel_skl_gt4_device
|| device == &intel_bxt_device) {
*ver = 9;
+ } else if (device == &intel_kbl_gt1_device || device == &intel_kbl_gt2_device
+ || device == &intel_kbl_gt3_device || device == &intel_kbl_gt4_device
+ || device == &intel_kbl_gt15_device) {
+ *ver = 10;
} else
return CL_INVALID_VALUE;
@@ -1015,21 +1159,7 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
{
int err = CL_SUCCESS;
int dimension = 0;
- if (UNLIKELY(device != &intel_ivb_gt1_device &&
- device != &intel_ivb_gt2_device &&
- device != &intel_baytrail_t_device &&
- device != &intel_hsw_gt1_device &&
- device != &intel_hsw_gt2_device &&
- device != &intel_hsw_gt3_device &&
- device != &intel_brw_gt1_device &&
- device != &intel_brw_gt2_device &&
- device != &intel_brw_gt3_device &&
- device != &intel_chv_device &&
- device != &intel_skl_gt1_device &&
- device != &intel_skl_gt2_device &&
- device != &intel_skl_gt3_device &&
- device != &intel_skl_gt4_device &&
- device != &intel_bxt_device))
+ if (UNLIKELY(is_gen_device(device) == CL_FALSE))
return CL_INVALID_DEVICE;
CHECK_KERNEL(kernel);
diff --git a/src/cl_gen10_device.h b/src/cl_gen10_device.h
new file mode 100644
index 0000000..4d961eb
--- /dev/null
+++ b/src/cl_gen10_device.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ */
+
+/* Common fields for both KBL devices */
+.max_parameter_size = 1024,
+.global_mem_cache_line_size = 64, /* XXX */
+.global_mem_cache_size = 8 << 10, /* XXX */
+.local_mem_type = CL_GLOBAL,
+.local_mem_size = 64 << 10,
+.scratch_mem_size = 2 << 20,
+.max_mem_alloc_size = 4 * 1024 * 1024 * 1024ul,
+.global_mem_size = 4 * 1024 * 1024 * 1024ul,
+
+#include "cl_gt_device.h"
+
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index e561725..a74d936 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -169,7 +169,9 @@ intel_driver_init(intel_driver_t *driver, int dev_fd)
else
FATAL ("Unsupported Gen for emulation");
#else
- if (IS_GEN9(driver->device_id))
+ if (IS_GEN10(driver->device_id))
+ driver->gen_ver = 10;
+ else if (IS_GEN9(driver->device_id))
driver->gen_ver = 9;
else if (IS_GEN8(driver->device_id))
driver->gen_ver = 8;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index db967e8..b80feda 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -2467,7 +2467,7 @@ intel_set_gpgpu_callbacks(int device_id)
intel_gpgpu_select_pipeline = intel_gpgpu_select_pipeline_gen7;
return;
}
- if (IS_SKYLAKE(device_id) || IS_BROXTON(device_id)) {
+ if (IS_SKYLAKE(device_id) || IS_BROXTON(device_id) || IS_KABYLAKE(device_id)) {
cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen9;
intel_gpgpu_set_L3 = intel_gpgpu_set_L3_gen8;
cl_gpgpu_get_cache_ctrl = (cl_gpgpu_get_cache_ctrl_cb *)intel_gpgpu_get_cache_ctrl_gen9;
--
2.1.4
More information about the Beignet
mailing list