[Beignet] [PATCH 8/8] Enable cl_khr_fp16 extension on BDW later platform.
junyan.he at inbox.com
junyan.he at inbox.com
Thu May 21 01:26:12 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
To support HALF FLOAT type, we need to add this extension.
IVB and HSW do not support half float. So we just enable
the extension on the device level, and need to get the
device ID before enable it.
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
src/cl_device_id.c | 112 ++++++++++++++++++++++++++++-----------------------
src/cl_device_id.h | 1 +
src/cl_extensions.c | 29 +++++++++++--
src/cl_extensions.h | 2 +
src/cl_gt_device.h | 1 +
src/cl_platform_id.c | 2 +-
6 files changed, 91 insertions(+), 56 deletions(-)
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 215f7f2..5871244 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -26,6 +26,7 @@
#include "cl_khr_icd.h"
#include "cl_thread.h"
#include "CL/cl.h"
+#include "CL/cl_ext.h"
#include "cl_gbe_loader.h"
#include "cl_alloc.h"
@@ -398,6 +399,8 @@ baytrail_t_device_break:
case PCI_CHIP_BROADWLL_U_GT1:
DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell ULX GT1");
brw_gt1_break:
+ /* For Gen8 and later, half float is suppported and we will enable cl_khr_fp16. */
+ cl_intel_platform_enable_fp16_extension(intel_platform);
intel_brw_gt1_device.vendor_id = device_id;
intel_brw_gt1_device.platform = intel_platform;
ret = &intel_brw_gt1_device;
@@ -414,6 +417,7 @@ brw_gt1_break:
case PCI_CHIP_BROADWLL_U_GT2:
DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell ULX GT2");
brw_gt2_break:
+ cl_intel_platform_enable_fp16_extension(intel_platform);
intel_brw_gt2_device.vendor_id = device_id;
intel_brw_gt2_device.platform = intel_platform;
ret = &intel_brw_gt2_device;
@@ -430,6 +434,7 @@ brw_gt2_break:
case PCI_CHIP_BROADWLL_U_GT3:
DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell ULX GT2");
brw_gt3_break:
+ cl_intel_platform_enable_fp16_extension(intel_platform);
intel_brw_gt3_device.vendor_id = device_id;
intel_brw_gt3_device.platform = intel_platform;
ret = &intel_brw_gt3_device;
@@ -447,61 +452,65 @@ chv_break:
break;
- case PCI_CHIP_SKYLAKE_ULT_GT1:
- DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1");
- case PCI_CHIP_SKYLAKE_ULX_GT1:
- DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1");
- case PCI_CHIP_SKYLAKE_DT_GT1:
- DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1");
- case PCI_CHIP_SKYLAKE_HALO_GT1:
- DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1");
- case PCI_CHIP_SKYLAKE_SRV_GT1:
- DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1");
+ case PCI_CHIP_SKYLAKE_ULT_GT1:
+ DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1");
+ case PCI_CHIP_SKYLAKE_ULX_GT1:
+ DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1");
+ case PCI_CHIP_SKYLAKE_DT_GT1:
+ DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1");
+ case PCI_CHIP_SKYLAKE_HALO_GT1:
+ DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1");
+ case PCI_CHIP_SKYLAKE_SRV_GT1:
+ DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1");
skl_gt1_break:
- intel_skl_gt1_device.vendor_id = device_id;
- intel_skl_gt1_device.platform = intel_platform;
- ret = &intel_skl_gt1_device;
- break;
-
- case PCI_CHIP_SKYLAKE_ULT_GT2:
- DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2");
- case PCI_CHIP_SKYLAKE_ULT_GT2F:
- DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F");
- case PCI_CHIP_SKYLAKE_ULX_GT2:
- DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2");
- case PCI_CHIP_SKYLAKE_DT_GT2:
- DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2");
- case PCI_CHIP_SKYLAKE_HALO_GT2:
- DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2");
- case PCI_CHIP_SKYLAKE_SRV_GT2:
- DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2");
+ cl_intel_platform_enable_fp16_extension(intel_platform);
+ intel_skl_gt1_device.vendor_id = device_id;
+ intel_skl_gt1_device.platform = intel_platform;
+ ret = &intel_skl_gt1_device;
+ break;
+
+ case PCI_CHIP_SKYLAKE_ULT_GT2:
+ DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2");
+ case PCI_CHIP_SKYLAKE_ULT_GT2F:
+ DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F");
+ case PCI_CHIP_SKYLAKE_ULX_GT2:
+ DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2");
+ case PCI_CHIP_SKYLAKE_DT_GT2:
+ DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2");
+ case PCI_CHIP_SKYLAKE_HALO_GT2:
+ DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2");
+ case PCI_CHIP_SKYLAKE_SRV_GT2:
+ DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2");
skl_gt2_break:
- intel_skl_gt2_device.vendor_id = device_id;
- intel_skl_gt2_device.platform = intel_platform;
- ret = &intel_skl_gt2_device;
- break;
-
- case PCI_CHIP_SKYLAKE_ULT_GT3:
- DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3");
- case PCI_CHIP_SKYLAKE_HALO_GT3:
- DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3");
- case PCI_CHIP_SKYLAKE_SRV_GT3:
- DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3");
+ cl_intel_platform_enable_fp16_extension(intel_platform);
+ intel_skl_gt2_device.vendor_id = device_id;
+ intel_skl_gt2_device.platform = intel_platform;
+ ret = &intel_skl_gt2_device;
+ break;
+
+ case PCI_CHIP_SKYLAKE_ULT_GT3:
+ DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3");
+ case PCI_CHIP_SKYLAKE_HALO_GT3:
+ DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3");
+ case PCI_CHIP_SKYLAKE_SRV_GT3:
+ DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3");
skl_gt3_break:
- intel_skl_gt3_device.vendor_id = device_id;
- intel_skl_gt3_device.platform = intel_platform;
- ret = &intel_skl_gt3_device;
- break;
-
- case PCI_CHIP_SKYLAKE_HALO_GT4:
- DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4");
- case PCI_CHIP_SKYLAKE_SRV_GT4:
- DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4");
+ cl_intel_platform_enable_fp16_extension(intel_platform);
+ intel_skl_gt3_device.vendor_id = device_id;
+ intel_skl_gt3_device.platform = intel_platform;
+ ret = &intel_skl_gt3_device;
+ break;
+
+ case PCI_CHIP_SKYLAKE_HALO_GT4:
+ DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4");
+ case PCI_CHIP_SKYLAKE_SRV_GT4:
+ DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4");
skl_gt4_break:
- intel_skl_gt4_device.vendor_id = device_id;
- intel_skl_gt4_device.platform = intel_platform;
- ret = &intel_skl_gt4_device;
- break;
+ cl_intel_platform_enable_fp16_extension(intel_platform);
+ intel_skl_gt4_device.vendor_id = device_id;
+ intel_skl_gt4_device.platform = intel_platform;
+ ret = &intel_skl_gt4_device;
+ break;
case PCI_CHIP_SANDYBRIDGE_BRIDGE:
case PCI_CHIP_SANDYBRIDGE_GT1:
@@ -747,6 +756,7 @@ cl_get_device_info(cl_device_id device,
DECL_FIELD(MEM_BASE_ADDR_ALIGN, mem_base_addr_align)
DECL_FIELD(MIN_DATA_TYPE_ALIGN_SIZE, min_data_type_align_size)
DECL_FIELD(SINGLE_FP_CONFIG, single_fp_config)
+ DECL_FIELD(HALF_FP_CONFIG, half_fp_config)
DECL_FIELD(DOUBLE_FP_CONFIG, double_fp_config)
DECL_FIELD(GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type)
DECL_FIELD(GLOBAL_MEM_CACHELINE_SIZE, global_mem_cache_line_size)
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index ee6a8e6..cde0160 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -66,6 +66,7 @@ struct _cl_device_id {
cl_uint mem_base_addr_align;
cl_uint min_data_type_align_size;
cl_device_fp_config single_fp_config;
+ cl_device_fp_config half_fp_config;
cl_device_fp_config double_fp_config;
cl_device_mem_cache_type global_mem_cache_type;
cl_uint global_mem_cache_line_size;
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index adcf82e..14ac726 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <string.h>
+#include <assert.h>
static struct cl_extensions intel_extensions =
{
@@ -90,12 +91,31 @@ process_extension_str(cl_extensions_t *extensions)
}
}
+static int ext_initialized = 0;
+
LOCAL void
-cl_intel_platform_extension_init(cl_platform_id intel_platform)
+cl_intel_platform_enable_fp16_extension(cl_platform_id intel_platform)
{
- static int initialized = 0;
+ cl_extensions_t *extensions = &intel_extensions;
+ int id;
+ assert(ext_initialized);
+
+ for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++)
+ {
+ if (id == EXT_ID(khr_fp16))
+ extensions->extensions[id].base.ext_enabled = 1;
+ }
- if (initialized) {
+ process_extension_str(extensions);
+ intel_platform->internal_extensions = &intel_extensions;
+ intel_platform->extensions = intel_extensions.ext_str;
+ intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1;
+}
+
+LOCAL void
+cl_intel_platform_extension_init(cl_platform_id intel_platform)
+{
+ if (ext_initialized) {
intel_platform->internal_extensions = &intel_extensions;
intel_platform->extensions = intel_extensions.ext_str;
return;
@@ -108,7 +128,8 @@ cl_intel_platform_extension_init(cl_platform_id intel_platform)
intel_platform->internal_extensions = &intel_extensions;
intel_platform->extensions = intel_extensions.ext_str;
+ intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1;
- initialized = 1;
+ ext_initialized = 1;
return;
}
diff --git a/src/cl_extensions.h b/src/cl_extensions.h
index e6cdce8..b1154a2 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -94,3 +94,5 @@ typedef struct cl_extensions {
extern void
cl_intel_platform_extension_init(cl_platform_id intel_platform);
+extern void
+cl_intel_platform_enable_fp16_extension(cl_platform_id intel_platform);
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 0950327..4b43c20 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -75,6 +75,7 @@
.platform = NULL, /* == intel_platform (set when requested) */
/* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
.single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */
+.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST ,
.printf_buffer_size = 1 * 1024 * 1024,
.interop_user_sync = CL_TRUE,
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
index a97c00f..bc2d799 100644
--- a/src/cl_platform_id.c
+++ b/src/cl_platform_id.c
@@ -56,7 +56,7 @@ cl_get_platform_ids(cl_uint num_entries,
/* Easy right now, only one platform is supported */
if(platforms)
*platforms = intel_platform;
- intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1;
+
return CL_SUCCESS;
}
--
1.9.1
More information about the Beignet
mailing list