[Beignet] [PATCH] add another broxton pciid 0x5A85
Guo, Yejun
yejun.guo at intel.com
Mon Sep 12 03:02:09 UTC 2016
to be precise, I'll separate the patch into two, one for bxt, and another one for chv.
-----Original Message-----
From: Pan, Xiuli
Sent: Monday, September 12, 2016 10:57 AM
To: Guo, Yejun; beignet at lists.freedesktop.org
Subject: RE: [Beignet] [PATCH] add another broxton pciid 0x5A85
I think the pciid patch can go first and then the stack size one, for the stack size patch will influence not only BXT but also CHV.
-----Original Message-----
From: Guo, Yejun
Sent: Monday, September 12, 2016 10:39 AM
To: Pan, Xiuli <xiuli.pan at intel.com>; beignet at lists.freedesktop.org
Subject: RE: [Beignet] [PATCH] add another broxton pciid 0x5A85
thanks, and the stack size bug need to be fixed to get 100% passrate of utest for this pciid, that's the reason I merge them into one patch.
-----Original Message-----
From: Pan, Xiuli
Sent: Monday, September 12, 2016 10:36 AM
To: Guo, Yejun; beignet at lists.freedesktop.org
Cc: Guo, Yejun
Subject: RE: [Beignet] [PATCH] add another broxton pciid 0x5A85
I think this patch can be spilt into two patch.
One is add this pciid. and the other is for the stack size bug.
Others LGTM.
-----Original Message-----
From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of Guo Yejun
Sent: Saturday, September 10, 2016 8:49 AM
To: beignet at lists.freedesktop.org
Cc: Guo, Yejun <yejun.guo at intel.com>
Subject: [Beignet] [PATCH] add another broxton pciid 0x5A85
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
src/cl_command_queue_gen7.c | 11 ++++++-----
src/cl_device_data.h | 4 +++-
src/cl_device_id.c | 23 ++++++++++++++++++++++-
src/cl_driver.h | 4 ++++
src/cl_driver_defs.c | 1 +
src/intel/intel_driver.c | 10 ++++++++++
6 files changed, 46 insertions(+), 7 deletions(-)
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c index 6a9cf1f..b6a5920 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -272,12 +272,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
assert(offset >= 0);
stack_sz *= interp_kernel_get_simd_width(ker->opaque);
stack_sz *= device->max_compute_unit * ctx->device->max_thread_per_unit;
- /* Because HSW calc stack offset per thread is relative with half slice, when
- thread schedule in half slice is not balance, would out of bound. Because
- the max half slice is 4 in GT4, multiply stack size with 4 for safe.
+
+ /* for some hardware, part of EUs are disabled with EU id reserved,
+ * it makes the active EU id larger than count of EUs within a subslice,
+ * need to enlarge stack size for such case to avoid out of range.
*/
- if(cl_driver_get_ver(ctx->drv) == 75)
- stack_sz *= 4;
+ cl_driver_enlarge_stack_size(ctx->drv, &stack_sz);
+
cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE); }
diff --git a/src/cl_device_data.h b/src/cl_device_data.h index f680219..30366ea 100644
--- a/src/cl_device_data.h
+++ b/src/cl_device_data.h
@@ -298,9 +298,11 @@
/* BXT */
#define PCI_CHIP_BROXTON_P 0x5A84 /* Intel(R) BXT-P for mobile desktop */
+#define PCI_CHIP_BROXTON_1 0x5A85
#define IS_BROXTON(devid) \
- (devid == PCI_CHIP_BROXTON_P)
+ (devid == PCI_CHIP_BROXTON_P || \
+ devid == PCI_CHIP_BROXTON_1)
#define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906
#define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916
diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 34c182c..ce340c1 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -195,6 +195,16 @@ static struct _cl_device_id intel_bxt_device = { #include "cl_gen9_device.h"
};
+static struct _cl_device_id intel_bxt1_device = {
+ .max_compute_unit = 12,
+ .max_thread_per_unit = 6,
+ .sub_slice_count = 2,
+ .max_work_item_sizes = {512, 512, 512},
+ .max_work_group_size = 512,
+ .max_clock_frequency = 1000,
+#include "cl_gen9_device.h"
+};
+
static struct _cl_device_id intel_kbl_gt1_device = {
.max_compute_unit = 12,
.max_thread_per_unit = 7,
@@ -615,6 +625,16 @@ bxt_break:
cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
break;
+ case PCI_CHIP_BROXTON_1:
+ DECL_INFO_STRING(bxt1_break, intel_bxt1_device, name, "Intel(R)
+HD Graphics Broxton 1");
+bxt1_break:
+ intel_bxt1_device.device_id = device_id;
+ intel_bxt1_device.platform = cl_get_platform_default();
+ ret = &intel_bxt1_device;
+ cl_intel_platform_get_default_extension(ret);
+ cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
+ break;
+
case PCI_CHIP_KABYLAKE_ULT_GT1:
DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R) HD Graphics Kabylake ULT GT1");
case PCI_CHIP_KABYLAKE_DT_GT1:
@@ -931,6 +951,7 @@ LOCAL cl_bool is_gen_device(cl_device_id device) {
device == &intel_skl_gt3_device ||
device == &intel_skl_gt4_device ||
device == &intel_bxt_device ||
+ device == &intel_bxt1_device ||
device == &intel_kbl_gt1_device ||
device == &intel_kbl_gt15_device ||
device == &intel_kbl_gt2_device || @@ -1074,7 +1095,7 @@ cl_device_get_version(cl_device_id device, cl_int *ver)
*ver = 8;
} else if (device == &intel_skl_gt1_device || device == &intel_skl_gt2_device
|| device == &intel_skl_gt3_device || device == &intel_skl_gt4_device
- || device == &intel_bxt_device || device == &intel_kbl_gt1_device
+ || device == &intel_bxt_device || device == &intel_bxt1_device
+ || device == &intel_kbl_gt1_device
|| device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device
|| device == &intel_kbl_gt4_device || device == &intel_kbl_gt15_device) {
*ver = 9;
diff --git a/src/cl_driver.h b/src/cl_driver.h index 16730db..584be9d 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -51,6 +51,10 @@ extern cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr; typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
extern cl_driver_get_ver_cb *cl_driver_get_ver;
+/* enlarge stack size from the driver */ typedef void
+(cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*); extern
+cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size;
+
typedef enum cl_self_test_res{
SELF_TEST_PASS = 0,
SELF_TEST_SLM_FAIL = 1,
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 31176a4..ea4e90a 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL; LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
+LOCAL cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size =
+NULL;
LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL; LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; LOCAL cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL; diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index e561725..0766ca3 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -464,6 +464,15 @@ intel_driver_get_ver(struct intel_driver *drv) }
static void
+intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t
+*stack_size) {
+ if (drv->gen_ver == 75)
+ *stack_size = *stack_size * 4;
+ else if (drv->device_id == PCI_CHIP_BROXTON_1 || IS_CHERRYVIEW(drv->device_id))
+ *stack_size = *stack_size * 2;
+}
+
+static void
intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) {
drv->atomic_test_result = atomic_flag; @@ -921,6 +930,7 @@ intel_setup_callbacks(void)
cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
+ cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *)
+ intel_driver_enlarge_stack_size;
cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *) intel_driver_set_atomic_flag;
cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *) intel_driver_get_bufmgr;
cl_driver_get_device_id = (cl_driver_get_device_id_cb *) intel_get_device_id;
--
2.7.4
_______________________________________________
Beignet mailing list
Beignet at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list