[Beignet] [PATCH V2] add another broxton pciid 0x5A85

Guo Yejun yejun.guo at intel.com
Mon Sep 12 07:02:04 UTC 2016


v2: split the code relative to chv to another patch
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 src/cl_command_queue_gen7.c | 11 ++++++-----
 src/cl_device_data.h        |  4 +++-
 src/cl_device_id.c          | 23 ++++++++++++++++++++++-
 src/cl_driver.h             |  4 ++++
 src/cl_driver_defs.c        |  1 +
 src/intel/intel_driver.c    | 10 ++++++++++
 6 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 6a9cf1f..b6a5920 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -272,12 +272,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
   assert(offset >= 0);
   stack_sz *= interp_kernel_get_simd_width(ker->opaque);
   stack_sz *= device->max_compute_unit * ctx->device->max_thread_per_unit;
-  /* Because HSW calc stack offset per thread is relative with half slice, when
-     thread schedule in half slice is not balance, would out of bound. Because
-     the max half slice is 4 in GT4, multiply stack size with 4 for safe.
+
+  /* for some hardware, part of EUs are disabled with EU id reserved,
+   * it makes the active EU id larger than count of EUs within a subslice,
+   * need to enlarge stack size for such case to avoid out of range.
    */
-  if(cl_driver_get_ver(ctx->drv) == 75)
-    stack_sz *= 4;
+  cl_driver_enlarge_stack_size(ctx->drv, &stack_sz);
+
   cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE);
 }
 
diff --git a/src/cl_device_data.h b/src/cl_device_data.h
index f680219..30366ea 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -298,9 +298,11 @@
 
 /* BXT */
 #define PCI_CHIP_BROXTON_P	0x5A84   /* Intel(R) BXT-P for mobile desktop */
+#define PCI_CHIP_BROXTON_1	0x5A85
 
 #define IS_BROXTON(devid)               \
-  (devid == PCI_CHIP_BROXTON_P)
+  (devid == PCI_CHIP_BROXTON_P ||       \
+   devid == PCI_CHIP_BROXTON_1)
 
 #define PCI_CHIP_KABYLAKE_ULT_GT1     0x5906
 #define PCI_CHIP_KABYLAKE_ULT_GT2     0x5916
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 34c182c..ce340c1 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -195,6 +195,16 @@ static struct _cl_device_id intel_bxt_device = {
 #include "cl_gen9_device.h"
 };
 
+static struct _cl_device_id intel_bxt1_device = {
+  .max_compute_unit = 12,
+  .max_thread_per_unit = 6,
+  .sub_slice_count = 2,
+  .max_work_item_sizes = {512, 512, 512},
+  .max_work_group_size = 512,
+  .max_clock_frequency = 1000,
+#include "cl_gen9_device.h"
+};
+
 static struct _cl_device_id intel_kbl_gt1_device = {
   .max_compute_unit = 12,
   .max_thread_per_unit = 7,
@@ -615,6 +625,16 @@ bxt_break:
       cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
       break;
 
+    case PCI_CHIP_BROXTON_1:
+      DECL_INFO_STRING(bxt1_break, intel_bxt1_device, name, "Intel(R) HD Graphics Broxton 1");
+bxt1_break:
+      intel_bxt1_device.device_id = device_id;
+      intel_bxt1_device.platform = cl_get_platform_default();
+      ret = &intel_bxt1_device;
+      cl_intel_platform_get_default_extension(ret);
+      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+      break;
+
     case PCI_CHIP_KABYLAKE_ULT_GT1:
       DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULT GT1");
     case PCI_CHIP_KABYLAKE_DT_GT1:
@@ -931,6 +951,7 @@ LOCAL cl_bool is_gen_device(cl_device_id device) {
          device == &intel_skl_gt3_device ||
          device == &intel_skl_gt4_device ||
          device == &intel_bxt_device     ||
+         device == &intel_bxt1_device    ||
          device == &intel_kbl_gt1_device ||
          device == &intel_kbl_gt15_device ||
          device == &intel_kbl_gt2_device ||
@@ -1074,7 +1095,7 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
     *ver = 8;
   } else if (device == &intel_skl_gt1_device || device == &intel_skl_gt2_device
         || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device
-        || device == &intel_bxt_device || device == &intel_kbl_gt1_device
+        || device == &intel_bxt_device || device == &intel_bxt1_device || device == &intel_kbl_gt1_device
         || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device
         || device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device) {
     *ver = 9;
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 16730db..584be9d 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -51,6 +51,10 @@ extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr;
 typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
 extern cl_driver_get_ver_cb *cl_driver_get_ver;
 
+/* enlarge stack size from the driver */
+typedef void (cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*);
+extern cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size;
+
 typedef enum cl_self_test_res{
   SELF_TEST_PASS = 0,
   SELF_TEST_SLM_FAIL  = 1,
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 31176a4..ea4e90a 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL;
 LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;
 LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;
 LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size = NULL;
 LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;
 LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;
 LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index e561725..ec2fb31 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -464,6 +464,15 @@ intel_driver_get_ver(struct intel_driver *drv)
 }
 
 static void
+intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t *stack_size)
+{
+    if (drv->gen_ver == 75)
+      *stack_size = *stack_size * 4;
+    else if (drv->device_id == PCI_CHIP_BROXTON_1)
+      *stack_size = *stack_size * 2;
+}
+
+static void
 intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)
 {
   drv->atomic_test_result = atomic_flag;
@@ -921,6 +930,7 @@ intel_setup_callbacks(void)
   cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
   cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
   cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+  cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *) intel_driver_enlarge_stack_size;
   cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
   cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
   cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
-- 
2.7.4



More information about the Beignet mailing list