[Beignet] [PATCH 19/19] runtime: Add fp16 extension to BDW later platform.

junyan.he at inbox.com junyan.he at inbox.com
Thu Jun 11 04:25:50 PDT 2015


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 src/cl_device_id.c   | 123 ++++++++++++++++++++++++++++++---------------------
 src/cl_device_id.h   |   1 +
 src/cl_extensions.c  |  29 ++++++++++--
 src/cl_extensions.h  |   2 +
 src/cl_gt_device.h   |   1 +
 src/cl_platform_id.c |   2 +-
 6 files changed, 102 insertions(+), 56 deletions(-)

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 215f7f2..09171f8 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -26,6 +26,7 @@
 #include "cl_khr_icd.h"
 #include "cl_thread.h"
 #include "CL/cl.h"
+#include "CL/cl_ext.h"
 #include "cl_gbe_loader.h"
 #include "cl_alloc.h"
 
@@ -398,6 +399,8 @@ baytrail_t_device_break:
     case PCI_CHIP_BROADWLL_U_GT1:
       DECL_INFO_STRING(brw_gt1_break, intel_brw_gt1_device, name, "Intel(R) HD Graphics BroadWell ULX GT1");
 brw_gt1_break:
+      /* For Gen8 and later, half float is suppported and we will enable cl_khr_fp16. */
+      cl_intel_platform_enable_fp16_extension(intel_platform);
       intel_brw_gt1_device.vendor_id = device_id;
       intel_brw_gt1_device.platform = intel_platform;
       ret = &intel_brw_gt1_device;
@@ -414,6 +417,7 @@ brw_gt1_break:
     case PCI_CHIP_BROADWLL_U_GT2:
       DECL_INFO_STRING(brw_gt2_break, intel_brw_gt2_device, name, "Intel(R) HD Graphics BroadWell ULX GT2");
 brw_gt2_break:
+      cl_intel_platform_enable_fp16_extension(intel_platform);
       intel_brw_gt2_device.vendor_id = device_id;
       intel_brw_gt2_device.platform = intel_platform;
       ret = &intel_brw_gt2_device;
@@ -430,6 +434,7 @@ brw_gt2_break:
     case PCI_CHIP_BROADWLL_U_GT3:
       DECL_INFO_STRING(brw_gt3_break, intel_brw_gt3_device, name, "Intel(R) HD Graphics BroadWell ULX GT2");
 brw_gt3_break:
+      cl_intel_platform_enable_fp16_extension(intel_platform);
       intel_brw_gt3_device.vendor_id = device_id;
       intel_brw_gt3_device.platform = intel_platform;
       ret = &intel_brw_gt3_device;
@@ -447,61 +452,65 @@ chv_break:
       break;
 
 
-	  case PCI_CHIP_SKYLAKE_ULT_GT1:
-		DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1");
-	  case PCI_CHIP_SKYLAKE_ULX_GT1:
-		DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1");
-	  case PCI_CHIP_SKYLAKE_DT_GT1:
-		DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1");
-	  case PCI_CHIP_SKYLAKE_HALO_GT1:
-		DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1");
-	  case PCI_CHIP_SKYLAKE_SRV_GT1:
-		DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1");
+    case PCI_CHIP_SKYLAKE_ULT_GT1:
+      DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULT GT1");
+    case PCI_CHIP_SKYLAKE_ULX_GT1:
+      DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake ULX GT1");
+    case PCI_CHIP_SKYLAKE_DT_GT1:
+      DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Desktop GT1");
+    case PCI_CHIP_SKYLAKE_HALO_GT1:
+      DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Halo GT1");
+    case PCI_CHIP_SKYLAKE_SRV_GT1:
+      DECL_INFO_STRING(skl_gt1_break, intel_skl_gt1_device, name, "Intel(R) HD Graphics Skylake Server GT1");
 skl_gt1_break:
-		intel_skl_gt1_device.vendor_id = device_id;
-		intel_skl_gt1_device.platform = intel_platform;
-		ret = &intel_skl_gt1_device;
-		break;
-
-	  case PCI_CHIP_SKYLAKE_ULT_GT2:
-		DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2");
-	  case PCI_CHIP_SKYLAKE_ULT_GT2F:
-		DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F");
-	  case PCI_CHIP_SKYLAKE_ULX_GT2:
-		DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2");
-	  case PCI_CHIP_SKYLAKE_DT_GT2:
-		DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2");
-	  case PCI_CHIP_SKYLAKE_HALO_GT2:
-		DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2");
-	  case PCI_CHIP_SKYLAKE_SRV_GT2:
-		DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2");
+      cl_intel_platform_enable_fp16_extension(intel_platform);
+      intel_skl_gt1_device.vendor_id = device_id;
+      intel_skl_gt1_device.platform = intel_platform;
+      ret = &intel_skl_gt1_device;
+      break;
+
+    case PCI_CHIP_SKYLAKE_ULT_GT2:
+      DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2");
+    case PCI_CHIP_SKYLAKE_ULT_GT2F:
+      DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULT GT2F");
+    case PCI_CHIP_SKYLAKE_ULX_GT2:
+      DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake ULX GT2");
+    case PCI_CHIP_SKYLAKE_DT_GT2:
+      DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Desktop GT2");
+    case PCI_CHIP_SKYLAKE_HALO_GT2:
+      DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Halo GT2");
+    case PCI_CHIP_SKYLAKE_SRV_GT2:
+      DECL_INFO_STRING(skl_gt2_break, intel_skl_gt2_device, name, "Intel(R) HD Graphics Skylake Server GT2");
 skl_gt2_break:
-		intel_skl_gt2_device.vendor_id = device_id;
-		intel_skl_gt2_device.platform = intel_platform;
-		ret = &intel_skl_gt2_device;
-		break;
-
-	  case PCI_CHIP_SKYLAKE_ULT_GT3:
-		DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3");
-      case PCI_CHIP_SKYLAKE_HALO_GT3:
-		DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3");
-	  case PCI_CHIP_SKYLAKE_SRV_GT3:
-		DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3");
+      cl_intel_platform_enable_fp16_extension(intel_platform);
+      intel_skl_gt2_device.vendor_id = device_id;
+      intel_skl_gt2_device.platform = intel_platform;
+      ret = &intel_skl_gt2_device;
+      break;
+
+    case PCI_CHIP_SKYLAKE_ULT_GT3:
+      DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake ULT GT3");
+    case PCI_CHIP_SKYLAKE_HALO_GT3:
+      DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Halo GT3");
+    case PCI_CHIP_SKYLAKE_SRV_GT3:
+      DECL_INFO_STRING(skl_gt3_break, intel_skl_gt3_device, name, "Intel(R) HD Graphics Skylake Server GT3");
 skl_gt3_break:
-		intel_skl_gt3_device.vendor_id = device_id;
-		intel_skl_gt3_device.platform = intel_platform;
-		ret = &intel_skl_gt3_device;
-		break;
-
-      case PCI_CHIP_SKYLAKE_HALO_GT4:
-		DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4");
-	  case PCI_CHIP_SKYLAKE_SRV_GT4:
-		DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4");
+      cl_intel_platform_enable_fp16_extension(intel_platform);
+      intel_skl_gt3_device.vendor_id = device_id;
+      intel_skl_gt3_device.platform = intel_platform;
+      ret = &intel_skl_gt3_device;
+      break;
+
+    case PCI_CHIP_SKYLAKE_HALO_GT4:
+      DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Halo GT4");
+    case PCI_CHIP_SKYLAKE_SRV_GT4:
+      DECL_INFO_STRING(skl_gt4_break, intel_skl_gt4_device, name, "Intel(R) HD Graphics Skylake Server GT4");
 skl_gt4_break:
-		intel_skl_gt4_device.vendor_id = device_id;
-		intel_skl_gt4_device.platform = intel_platform;
-		ret = &intel_skl_gt4_device;
-		break;
+      cl_intel_platform_enable_fp16_extension(intel_platform);
+      intel_skl_gt4_device.vendor_id = device_id;
+      intel_skl_gt4_device.platform = intel_platform;
+      ret = &intel_skl_gt4_device;
+      break;
 
     case PCI_CHIP_SANDYBRIDGE_BRIDGE:
     case PCI_CHIP_SANDYBRIDGE_GT1:
@@ -626,6 +635,17 @@ cl_get_device_ids(cl_platform_id    platform,
 {
   cl_device_id device;
 
+  /* Spec allow platform to be NULL, and If platform
+     is NULL, the behavior is implementation-defined.
+     We can not init the device before platform init. */
+  if (!platform) {
+    if (num_devices)
+      *num_devices = 0;
+    if (devices)
+      *devices = 0;
+    return CL_DEVICE_NOT_FOUND;
+  }
+
   /* Do we have a usable device? */
   device = cl_get_gt_device();
   if (device && cl_self_test(device)) {
@@ -747,6 +767,7 @@ cl_get_device_info(cl_device_id     device,
     DECL_FIELD(MEM_BASE_ADDR_ALIGN, mem_base_addr_align)
     DECL_FIELD(MIN_DATA_TYPE_ALIGN_SIZE, min_data_type_align_size)
     DECL_FIELD(SINGLE_FP_CONFIG, single_fp_config)
+    DECL_FIELD(HALF_FP_CONFIG, half_fp_config)
     DECL_FIELD(DOUBLE_FP_CONFIG, double_fp_config)
     DECL_FIELD(GLOBAL_MEM_CACHE_TYPE, global_mem_cache_type)
     DECL_FIELD(GLOBAL_MEM_CACHELINE_SIZE, global_mem_cache_line_size)
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index ee6a8e6..cde0160 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -66,6 +66,7 @@ struct _cl_device_id {
   cl_uint  mem_base_addr_align;
   cl_uint  min_data_type_align_size;
   cl_device_fp_config single_fp_config;
+  cl_device_fp_config half_fp_config;
   cl_device_fp_config double_fp_config;
   cl_device_mem_cache_type global_mem_cache_type;
   cl_uint  global_mem_cache_line_size;
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index adcf82e..14ac726 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -11,6 +11,7 @@
 
 #include <stdlib.h>
 #include <string.h>
+#include <assert.h>
 
 static struct cl_extensions intel_extensions =
 {
@@ -90,12 +91,31 @@ process_extension_str(cl_extensions_t *extensions)
   }
 }
 
+static int ext_initialized = 0;
+
 LOCAL void
-cl_intel_platform_extension_init(cl_platform_id intel_platform)
+cl_intel_platform_enable_fp16_extension(cl_platform_id intel_platform)
 {
-  static int initialized = 0;
+  cl_extensions_t *extensions = &intel_extensions;
+  int id;
+  assert(ext_initialized);
+
+  for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++)
+  {
+    if (id == EXT_ID(khr_fp16))
+      extensions->extensions[id].base.ext_enabled = 1;
+  }
 
-  if (initialized) {
+  process_extension_str(extensions);
+  intel_platform->internal_extensions = &intel_extensions;
+  intel_platform->extensions = intel_extensions.ext_str;
+  intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1;
+}
+
+LOCAL void
+cl_intel_platform_extension_init(cl_platform_id intel_platform)
+{
+  if (ext_initialized) {
     intel_platform->internal_extensions = &intel_extensions;
     intel_platform->extensions = intel_extensions.ext_str;
     return;
@@ -108,7 +128,8 @@ cl_intel_platform_extension_init(cl_platform_id intel_platform)
 
   intel_platform->internal_extensions = &intel_extensions;
   intel_platform->extensions = intel_extensions.ext_str;
+  intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1;
 
-  initialized = 1;
+  ext_initialized = 1;
   return;
 }
diff --git a/src/cl_extensions.h b/src/cl_extensions.h
index e6cdce8..b1154a2 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -94,3 +94,5 @@ typedef struct cl_extensions {
 
 extern void
 cl_intel_platform_extension_init(cl_platform_id intel_platform);
+extern void
+cl_intel_platform_enable_fp16_extension(cl_platform_id intel_platform);
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 0950327..4b43c20 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -75,6 +75,7 @@
 .platform = NULL, /* == intel_platform (set when requested) */
 /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
 .single_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST , /* IEEE 754. */
+.half_fp_config = CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST ,
 .printf_buffer_size = 1 * 1024 * 1024,
 .interop_user_sync = CL_TRUE,
 
diff --git a/src/cl_platform_id.c b/src/cl_platform_id.c
index a97c00f..bc2d799 100644
--- a/src/cl_platform_id.c
+++ b/src/cl_platform_id.c
@@ -56,7 +56,7 @@ cl_get_platform_ids(cl_uint          num_entries,
   /* Easy right now, only one platform is supported */
   if(platforms)
     *platforms = intel_platform;
-  intel_platform->extensions_sz = strlen(intel_platform->extensions) + 1;
+
   return CL_SUCCESS;
 }
 
-- 
1.9.1



More information about the Beignet mailing list