[Beignet] [PATCH V2] add another broxton pciid 0x5A85
Yang, Rong R
rong.r.yang at intel.com
Mon Sep 12 08:47:05 UTC 2016
LGTM.
Because 0x5A85 is fuse down, after enable pooled EU, need to take care of the fuse down sub slice.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Monday, September 12, 2016 15:02
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>
> Subject: [Beignet] [PATCH V2] add another broxton pciid 0x5A85
>
> v2: split the code relative to chv to another patch
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
> src/cl_command_queue_gen7.c | 11 ++++++-----
> src/cl_device_data.h | 4 +++-
> src/cl_device_id.c | 23 ++++++++++++++++++++++-
> src/cl_driver.h | 4 ++++
> src/cl_driver_defs.c | 1 +
> src/intel/intel_driver.c | 10 ++++++++++
> 6 files changed, 46 insertions(+), 7 deletions(-)
>
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 6a9cf1f..b6a5920 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -272,12 +272,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
> assert(offset >= 0);
> stack_sz *= interp_kernel_get_simd_width(ker->opaque);
> stack_sz *= device->max_compute_unit * ctx->device-
> >max_thread_per_unit;
> - /* Because HSW calc stack offset per thread is relative with half slice, when
> - thread schedule in half slice is not balance, would out of bound. Because
> - the max half slice is 4 in GT4, multiply stack size with 4 for safe.
> +
> + /* for some hardware, part of EUs are disabled with EU id reserved,
> + * it makes the active EU id larger than count of EUs within a subslice,
> + * need to enlarge stack size for such case to avoid out of range.
> */
> - if(cl_driver_get_ver(ctx->drv) == 75)
> - stack_sz *= 4;
> + cl_driver_enlarge_stack_size(ctx->drv, &stack_sz);
> +
> cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE); }
>
> diff --git a/src/cl_device_data.h b/src/cl_device_data.h index
> f680219..30366ea 100644
> --- a/src/cl_device_data.h
> +++ b/src/cl_device_data.h
> @@ -298,9 +298,11 @@
>
> /* BXT */
> #define PCI_CHIP_BROXTON_P 0x5A84 /* Intel(R) BXT-P for mobile
> desktop */
> +#define PCI_CHIP_BROXTON_1 0x5A85
>
> #define IS_BROXTON(devid) \
> - (devid == PCI_CHIP_BROXTON_P)
> + (devid == PCI_CHIP_BROXTON_P || \
> + devid == PCI_CHIP_BROXTON_1)
>
> #define PCI_CHIP_KABYLAKE_ULT_GT1 0x5906
> #define PCI_CHIP_KABYLAKE_ULT_GT2 0x5916
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 34c182c..ce340c1
> 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -195,6 +195,16 @@ static struct _cl_device_id intel_bxt_device =
> { #include "cl_gen9_device.h"
> };
>
> +static struct _cl_device_id intel_bxt1_device = {
> + .max_compute_unit = 12,
> + .max_thread_per_unit = 6,
> + .sub_slice_count = 2,
> + .max_work_item_sizes = {512, 512, 512},
> + .max_work_group_size = 512,
> + .max_clock_frequency = 1000,
> +#include "cl_gen9_device.h"
> +};
> +
> static struct _cl_device_id intel_kbl_gt1_device = {
> .max_compute_unit = 12,
> .max_thread_per_unit = 7,
> @@ -615,6 +625,16 @@ bxt_break:
> cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
> break;
>
> + case PCI_CHIP_BROXTON_1:
> + DECL_INFO_STRING(bxt1_break, intel_bxt1_device, name, "Intel(R)
> +HD Graphics Broxton 1");
> +bxt1_break:
> + intel_bxt1_device.device_id = device_id;
> + intel_bxt1_device.platform = cl_get_platform_default();
> + ret = &intel_bxt1_device;
> + cl_intel_platform_get_default_extension(ret);
> + cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
> + break;
> +
> case PCI_CHIP_KABYLAKE_ULT_GT1:
> DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R)
> HD Graphics Kabylake ULT GT1");
> case PCI_CHIP_KABYLAKE_DT_GT1:
> @@ -931,6 +951,7 @@ LOCAL cl_bool is_gen_device(cl_device_id device) {
> device == &intel_skl_gt3_device ||
> device == &intel_skl_gt4_device ||
> device == &intel_bxt_device ||
> + device == &intel_bxt1_device ||
> device == &intel_kbl_gt1_device ||
> device == &intel_kbl_gt15_device ||
> device == &intel_kbl_gt2_device || @@ -1074,7 +1095,7 @@
> cl_device_get_version(cl_device_id device, cl_int *ver)
> *ver = 8;
> } else if (device == &intel_skl_gt1_device || device ==
> &intel_skl_gt2_device
> || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device
> - || device == &intel_bxt_device || device == &intel_kbl_gt1_device
> + || device == &intel_bxt_device || device == &intel_bxt1_device
> + || device == &intel_kbl_gt1_device
> || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device
> || device == &intel_kbl_gt4_device || device ==
> &intel_kbl_gt15_device) {
> *ver = 9;
> diff --git a/src/cl_driver.h b/src/cl_driver.h index 16730db..584be9d 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -51,6 +51,10 @@ extern cl_driver_get_bufmgr_cb
> *cl_driver_get_bufmgr; typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
> extern cl_driver_get_ver_cb *cl_driver_get_ver;
>
> +/* enlarge stack size from the driver */ typedef void
> +(cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*); extern
> +cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size;
> +
> typedef enum cl_self_test_res{
> SELF_TEST_PASS = 0,
> SELF_TEST_SLM_FAIL = 1,
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index
> 31176a4..ea4e90a 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL;
> LOCAL cl_driver_delete_cb *cl_driver_delete = NULL; LOCAL
> cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL; LOCAL
> cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
> +LOCAL cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size =
> +NULL;
> LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;
> LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL; LOCAL
> cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index
> e561725..ec2fb31 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -464,6 +464,15 @@ intel_driver_get_ver(struct intel_driver *drv) }
>
> static void
> +intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t
> +*stack_size) {
> + if (drv->gen_ver == 75)
> + *stack_size = *stack_size * 4;
> + else if (drv->device_id == PCI_CHIP_BROXTON_1)
> + *stack_size = *stack_size * 2;
> +}
> +
> +static void
> intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) {
> drv->atomic_test_result = atomic_flag; @@ -921,6 +930,7 @@
> intel_setup_callbacks(void)
> cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
> cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
> cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
> + cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *)
> + intel_driver_enlarge_stack_size;
> cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *)
> intel_driver_set_atomic_flag;
> cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *)
> intel_driver_get_bufmgr;
> cl_driver_get_device_id = (cl_driver_get_device_id_cb *)
> intel_get_device_id;
> --
> 2.7.4
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list