[Beignet] [PATCH V2] add another broxton pciid 0x5A85

Yang, Rong R rong.r.yang at intel.com
Mon Sep 12 08:47:05 UTC 2016


LGTM.
Because 0x5A85 is fuse down, after enable pooled EU, need to take care of the fuse down sub slice.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Guo Yejun
> Sent: Monday, September 12, 2016 15:02
> To: beignet at lists.freedesktop.org
> Cc: Guo, Yejun <yejun.guo at intel.com>
> Subject: [Beignet] [PATCH V2] add another broxton pciid 0x5A85
> 
> v2: split the code relative to chv to another patch
> Signed-off-by: Guo Yejun <yejun.guo at intel.com>
> ---
>  src/cl_command_queue_gen7.c | 11 ++++++-----
>  src/cl_device_data.h        |  4 +++-
>  src/cl_device_id.c          | 23 ++++++++++++++++++++++-
>  src/cl_driver.h             |  4 ++++
>  src/cl_driver_defs.c        |  1 +
>  src/intel/intel_driver.c    | 10 ++++++++++
>  6 files changed, 46 insertions(+), 7 deletions(-)
> 
> diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c index 6a9cf1f..b6a5920 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -272,12 +272,13 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
>    assert(offset >= 0);
>    stack_sz *= interp_kernel_get_simd_width(ker->opaque);
>    stack_sz *= device->max_compute_unit * ctx->device-
> >max_thread_per_unit;
> -  /* Because HSW calc stack offset per thread is relative with half slice, when
> -     thread schedule in half slice is not balance, would out of bound. Because
> -     the max half slice is 4 in GT4, multiply stack size with 4 for safe.
> +
> +  /* for some hardware, part of EUs are disabled with EU id reserved,
> +   * it makes the active EU id larger than count of EUs within a subslice,
> +   * need to enlarge stack size for such case to avoid out of range.
>     */
> -  if(cl_driver_get_ver(ctx->drv) == 75)
> -    stack_sz *= 4;
> +  cl_driver_enlarge_stack_size(ctx->drv, &stack_sz);
> +
>    cl_gpgpu_set_stack(gpgpu, offset, stack_sz, BTI_PRIVATE);  }
> 
> diff --git a/src/cl_device_data.h b/src/cl_device_data.h index
> f680219..30366ea 100644
> --- a/src/cl_device_data.h
> +++ b/src/cl_device_data.h
> @@ -298,9 +298,11 @@
> 
>  /* BXT */
>  #define PCI_CHIP_BROXTON_P	0x5A84   /* Intel(R) BXT-P for mobile
> desktop */
> +#define PCI_CHIP_BROXTON_1	0x5A85
> 
>  #define IS_BROXTON(devid)               \
> -  (devid == PCI_CHIP_BROXTON_P)
> +  (devid == PCI_CHIP_BROXTON_P ||       \
> +   devid == PCI_CHIP_BROXTON_1)
> 
>  #define PCI_CHIP_KABYLAKE_ULT_GT1     0x5906
>  #define PCI_CHIP_KABYLAKE_ULT_GT2     0x5916
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 34c182c..ce340c1
> 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -195,6 +195,16 @@ static struct _cl_device_id intel_bxt_device =
> {  #include "cl_gen9_device.h"
>  };
> 
> +static struct _cl_device_id intel_bxt1_device = {
> +  .max_compute_unit = 12,
> +  .max_thread_per_unit = 6,
> +  .sub_slice_count = 2,
> +  .max_work_item_sizes = {512, 512, 512},
> +  .max_work_group_size = 512,
> +  .max_clock_frequency = 1000,
> +#include "cl_gen9_device.h"
> +};
> +
>  static struct _cl_device_id intel_kbl_gt1_device = {
>    .max_compute_unit = 12,
>    .max_thread_per_unit = 7,
> @@ -615,6 +625,16 @@ bxt_break:
>        cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
>        break;
> 
> +    case PCI_CHIP_BROXTON_1:
> +      DECL_INFO_STRING(bxt1_break, intel_bxt1_device, name, "Intel(R)
> +HD Graphics Broxton 1");
> +bxt1_break:
> +      intel_bxt1_device.device_id = device_id;
> +      intel_bxt1_device.platform = cl_get_platform_default();
> +      ret = &intel_bxt1_device;
> +      cl_intel_platform_get_default_extension(ret);
> +      cl_intel_platform_enable_extension(ret, cl_khr_fp16_ext_id);
> +      break;
> +
>      case PCI_CHIP_KABYLAKE_ULT_GT1:
>        DECL_INFO_STRING(kbl_gt1_break, intel_kbl_gt1_device, name, "Intel(R)
> HD Graphics Kabylake ULT GT1");
>      case PCI_CHIP_KABYLAKE_DT_GT1:
> @@ -931,6 +951,7 @@ LOCAL cl_bool is_gen_device(cl_device_id device) {
>           device == &intel_skl_gt3_device ||
>           device == &intel_skl_gt4_device ||
>           device == &intel_bxt_device     ||
> +         device == &intel_bxt1_device    ||
>           device == &intel_kbl_gt1_device ||
>           device == &intel_kbl_gt15_device ||
>           device == &intel_kbl_gt2_device || @@ -1074,7 +1095,7 @@
> cl_device_get_version(cl_device_id device, cl_int *ver)
>      *ver = 8;
>    } else if (device == &intel_skl_gt1_device || device ==
> &intel_skl_gt2_device
>          || device == &intel_skl_gt3_device || device == &intel_skl_gt4_device
> -        || device == &intel_bxt_device || device == &intel_kbl_gt1_device
> +        || device == &intel_bxt_device || device == &intel_bxt1_device
> + || device == &intel_kbl_gt1_device
>          || device == &intel_kbl_gt2_device || device == &intel_kbl_gt3_device
>          || device == &intel_kbl_gt4_device || device ==
> &intel_kbl_gt15_device) {
>      *ver = 9;
> diff --git a/src/cl_driver.h b/src/cl_driver.h index 16730db..584be9d 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -51,6 +51,10 @@ extern cl_driver_get_bufmgr_cb
> *cl_driver_get_bufmgr;  typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
>  extern cl_driver_get_ver_cb *cl_driver_get_ver;
> 
> +/* enlarge stack size from the driver */ typedef void
> +(cl_driver_enlarge_stack_size_cb)(cl_driver, int32_t*); extern
> +cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size;
> +
>  typedef enum cl_self_test_res{
>    SELF_TEST_PASS = 0,
>    SELF_TEST_SLM_FAIL  = 1,
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index
> 31176a4..ea4e90a 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL;
> LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;  LOCAL
> cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;  LOCAL
> cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
> +LOCAL cl_driver_enlarge_stack_size_cb *cl_driver_enlarge_stack_size =
> +NULL;
>  LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;
> LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;  LOCAL
> cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index
> e561725..ec2fb31 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -464,6 +464,15 @@ intel_driver_get_ver(struct intel_driver *drv)  }
> 
>  static void
> +intel_driver_enlarge_stack_size(struct intel_driver *drv, int32_t
> +*stack_size) {
> +    if (drv->gen_ver == 75)
> +      *stack_size = *stack_size * 4;
> +    else if (drv->device_id == PCI_CHIP_BROXTON_1)
> +      *stack_size = *stack_size * 2;
> +}
> +
> +static void
>  intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag)  {
>    drv->atomic_test_result = atomic_flag; @@ -921,6 +930,7 @@
> intel_setup_callbacks(void)
>    cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
>    cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
>    cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
> +  cl_driver_enlarge_stack_size = (cl_driver_enlarge_stack_size_cb *)
> + intel_driver_enlarge_stack_size;
>    cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *)
> intel_driver_set_atomic_flag;
>    cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *)
> intel_driver_get_bufmgr;
>    cl_driver_get_device_id = (cl_driver_get_device_id_cb *)
> intel_get_device_id;
> --
> 2.7.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list