[Beignet] [PATCH v3] use self test to determine enable/or disable atomics in L3 for HSW.

Yang, Rong R rong.r.yang at intel.com
Mon Jun 29 23:08:20 PDT 2015


LGTM, thanks, pushed.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> xionghu.luo at intel.com
> Sent: Monday, June 29, 2015 15:22
> To: beignet at lists.freedesktop.org
> Cc: Luo, Xionghu
> Subject: [Beignet] [PATCH v3] use self test to determine enable/or disable
> atomics in L3 for HSW.
> 
> From: Luo Xionghu <xionghu.luo at intel.com>
> 
> check the selftest kernel return value, if enqueue kernel failed, set the flag
> to not enable atomics the L3 for HSW.
> 
> This reverts commit 83f8739b6fc4893fac60145326052ccb5cf653dc.
> v2: don't use global variable to pass value from runtime to driver.
> v3: add type SELF_TEST_OTHER_FAIL to differentiate from
> SELF_TEST_ATOMIC_FAIL; seperate the ATOMIC_FAIL from SLM_FAIL, only
> SLM_FAIL can be control by env OCL_IGNORE_SELF_TEST.
> 
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
>  src/cl_context.c          |  1 +
>  src/cl_device_id.c        | 48 +++++++++++++++++++++++++---------------------
> -
>  src/cl_device_id.h        |  1 +
>  src/cl_driver.h           |  9 +++++++++
>  src/cl_driver_defs.c      |  1 +
>  src/intel/intel_defines.h |  3 +++
>  src/intel/intel_driver.c  |  7 +++++++
>  src/intel/intel_driver.h  |  1 +
>  src/intel/intel_gpgpu.c   | 18 +++++++++++++++++-
>  9 files changed, 66 insertions(+), 23 deletions(-)
> 
> diff --git a/src/cl_context.c b/src/cl_context.c index 0f08e6a..773f545 100644
> --- a/src/cl_context.c
> +++ b/src/cl_context.c
> @@ -149,6 +149,7 @@ cl_create_context(const cl_context_properties *
> properties,
>    /* Save the user callback and user data*/
>    ctx->pfn_notify = pfn_notify;
>    ctx->user_data = user_data;
> +  cl_driver_set_atomic_flag(ctx->drv, ctx->device->atomic_test_result);
> 
>  exit:
>    if (errcode_ret != NULL)
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 215f7f2..9d3ab2f
> 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -197,7 +197,6 @@ static struct _cl_device_id intel_skl_gt4_device =
> {  #include "cl_gen75_device.h"
>  };
> 
> -
>  LOCAL cl_device_id
>  cl_get_gt_device(void)
>  {
> @@ -546,8 +545,11 @@ skl_gt4_break:
>  }
> 
>  /* Runs a small kernel to check that the device works; returns
> - * 0 for success, 1 for silently wrong result, 2 for error */ -LOCAL cl_int
> + * SELF_TEST_PASS: for success.
> + * SELF_TEST_SLM_FAIL: for SLM results mismatch;
> + * SELF_TEST_ATOMIC_FAIL: for hsw enqueue  kernel failure to not enable
> atomics in L3.
> + * SELF_TEST_OTHER_FAIL: other fail like runtime API fail.*/ LOCAL
> +cl_self_test_res
>  cl_self_test(cl_device_id device)
>  {
>    cl_int status;
> @@ -566,7 +568,7 @@ cl_self_test(cl_device_id device)
>    "  buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];"
>    "}"; // using __local to catch the "no SLM on Haswell" problem
>    static int tested = 0;
> -  static cl_int ret = 2;
> +  static cl_self_test_res ret = SELF_TEST_OTHER_FAIL;
>    if (tested != 0)
>      return ret;
>    tested = 1;
> @@ -589,14 +591,16 @@ cl_self_test(cl_device_id device)
>                    status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4,
> test_data, 1, &kernel_finished, NULL);
>                    if (status == CL_SUCCESS) {
>                      if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){
> -                      ret = 0;
> +                      ret = SELF_TEST_PASS;
>                      } else {
> -                      ret = 1;
> +                      ret = SELF_TEST_SLM_FAIL;
>                        printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned
> (%i, %i, %i)\n"
>                               "See README.md or
> http://www.freedesktop.org/wiki/Software/Beignet/\n",
>                               test_data[0], test_data[1], test_data[2]);
>                      }
>                    }
> +                } else{
> +                  ret = SELF_TEST_ATOMIC_FAIL;
>                  }
>                }
>              }
> @@ -610,10 +614,6 @@ cl_self_test(cl_device_id device)
>      clReleaseCommandQueue(queue);
>    }
>    clReleaseContext(ctx);
> -  if (ret == 2) {
> -    printf("Beignet: self-test failed: error %i\n"
> -           "See README.md or
> http://www.freedesktop.org/wiki/Software/Beignet/\n", status);
> -  }
>    return ret;
>  }
> 
> @@ -628,18 +628,22 @@ cl_get_device_ids(cl_platform_id    platform,
> 
>    /* Do we have a usable device? */
>    device = cl_get_gt_device();
> -  if (device && cl_self_test(device)) {
> -    int disable_self_test = 0;
> -    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
> -    const char *env = getenv("OCL_IGNORE_SELF_TEST");
> -    if (env != NULL) {
> -      sscanf(env, "%i", &disable_self_test);
> -    }
> -    if (disable_self_test) {
> -      printf("Beignet: Warning - overriding self-test failure\n");
> -    } else {
> -      printf("Beignet: disabling non-working device\n");
> -      device = 0;
> +  if (device) {
> +    cl_self_test_res ret = cl_self_test(device);
> +    device->atomic_test_result = ret;
> +    if(ret == SELF_TEST_SLM_FAIL) {
> +      int disable_self_test = 0;
> +      // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
> +      const char *env = getenv("OCL_IGNORE_SELF_TEST");
> +      if (env != NULL) {
> +        sscanf(env, "%i", &disable_self_test);
> +      }
> +      if (disable_self_test) {
> +        printf("Beignet: Warning - overriding self-test failure\n");
> +      } else {
> +        printf("Beignet: disabling non-working device\n");
> +        device = 0;
> +      }
>      }
>    }
>    if (!device) {
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h index ee6a8e6..1bd5806
> 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -113,6 +113,7 @@ struct _cl_device_id {
>    cl_device_affinity_domain    affinity_domain;
>    cl_device_partition_property partition_type[3];
>    cl_uint      device_reference_count;
> +  uint32_t atomic_test_result;
>  };
> 
>  /* Get a device from the given platform */ diff --git a/src/cl_driver.h
> b/src/cl_driver.h index b2510de..1ab4dff 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -49,6 +49,15 @@ extern cl_driver_get_bufmgr_cb
> *cl_driver_get_bufmgr;  typedef uint32_t (cl_driver_get_ver_cb)(cl_driver);
>  extern cl_driver_get_ver_cb *cl_driver_get_ver;
> 
> +typedef enum cl_self_test_res{
> +  SELF_TEST_PASS = 0,
> +  SELF_TEST_SLM_FAIL  = 1,
> +  SELF_TEST_ATOMIC_FAIL = 2,
> +  SELF_TEST_OTHER_FAIL = 3,
> +} cl_self_test_res;
> +/* Set the atomic enable/disable flag in the driver */ typedef void
> +(cl_driver_set_atomic_flag_cb)(cl_driver, int); extern
> +cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag;
> 
> /**********************************************************
> ****************
>   * GPGPU command streamer
> 
> **********************************************************
> ****************/
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c index 9a47210..b77acdc
> 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -25,6 +25,7 @@ LOCAL cl_driver_new_cb *cl_driver_new = NULL;
> LOCAL cl_driver_delete_cb *cl_driver_delete = NULL;  LOCAL
> cl_driver_get_bufmgr_cb *cl_driver_get_bufmgr = NULL;  LOCAL
> cl_driver_get_ver_cb *cl_driver_get_ver = NULL;
> +LOCAL cl_driver_set_atomic_flag_cb *cl_driver_set_atomic_flag = NULL;
>  LOCAL cl_driver_get_device_id_cb *cl_driver_get_device_id = NULL;  LOCAL
> cl_driver_update_device_info_cb *cl_driver_update_device_info = NULL;
> 
> diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h index
> 1080a91..6ada30c 100644
> --- a/src/intel/intel_defines.h
> +++ b/src/intel/intel_defines.h
> @@ -304,6 +304,9 @@
> 
>  #define URB_SIZE(intel)         (IS_IGDNG(intel->device_id) ? 1024 : \
>                                   IS_G4X(intel->device_id) ? 384 : 256)
> +// HSW
> +#define HSW_SCRATCH1_OFFSET                      (0xB038)
> +#define HSW_ROW_CHICKEN3_HDC_OFFSET              (0xE49C)
> 
>  // L3 cache stuff
>  #define GEN7_L3_SQC_REG1_ADDRESS_OFFSET          (0XB010)
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c index
> 1bebd9a..9c72777 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -448,6 +448,12 @@ intel_driver_get_ver(struct intel_driver *drv)
>    return drv->gen_ver;
>  }
> 
> +static void
> +intel_driver_set_atomic_flag(intel_driver_t *drv, int atomic_flag) {
> +  drv->atomic_test_result = atomic_flag; }
> +
>  static size_t drm_intel_bo_get_size(drm_intel_bo *bo) { return bo->size; }
> static void* drm_intel_bo_get_virtual(drm_intel_bo *bo) { return bo-
> >virtual; }
> 
> @@ -834,6 +840,7 @@ intel_setup_callbacks(void)
>    cl_driver_new = (cl_driver_new_cb *) cl_intel_driver_new;
>    cl_driver_delete = (cl_driver_delete_cb *) cl_intel_driver_delete;
>    cl_driver_get_ver = (cl_driver_get_ver_cb *) intel_driver_get_ver;
> +  cl_driver_set_atomic_flag = (cl_driver_set_atomic_flag_cb *)
> + intel_driver_set_atomic_flag;
>    cl_driver_get_bufmgr = (cl_driver_get_bufmgr_cb *)
> intel_driver_get_bufmgr;
>    cl_driver_get_device_id = (cl_driver_get_device_id_cb *)
> intel_get_device_id;
>    cl_driver_update_device_info = (cl_driver_update_device_info_cb *)
> intel_update_device_info; diff --git a/src/intel/intel_driver.h
> b/src/intel/intel_driver.h index f972ec8..51f0e0d 100644
> --- a/src/intel/intel_driver.h
> +++ b/src/intel/intel_driver.h
> @@ -89,6 +89,7 @@ typedef struct intel_driver
>    Display *x11_display;
>    struct dri_state *dri_ctx;
>    struct intel_gpgpu_node *gpgpu_list;
> +  int atomic_test_result;
>  } intel_driver_t;
> 
>  #define SET_BLOCKED_SIGSET(DRIVER)   do {                     \
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c index
> b083dab..901bd98 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -719,7 +719,23 @@ static void
>  intel_gpgpu_set_L3_gen75(intel_gpgpu_t *gpgpu, uint32_t use_slm)  {
>    /* still set L3 in batch buffer for fulsim. */
> -  BEGIN_BATCH(gpgpu->batch, 9);
> +  if(gpgpu->drv->atomic_test_result != SELF_TEST_ATOMIC_FAIL)  {
> +    BEGIN_BATCH(gpgpu->batch, 15);
> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -
> 2 */
> +    /* FIXME: KMD always disable the atomic in L3 for some reason.
> +       I checked the spec, and don't think we need that workaround now.
> +       Before I send a patch to kernel, let's just enable it here. */
> +    OUT_BATCH(gpgpu->batch, HSW_SCRATCH1_OFFSET);
> +    OUT_BATCH(gpgpu->batch, 0);                         /* enable atomic in L3 */
> +    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length -
> 2 */
> +    OUT_BATCH(gpgpu->batch, HSW_ROW_CHICKEN3_HDC_OFFSET);
> +    OUT_BATCH(gpgpu->batch, (1 << 6ul) << 16);          /* enable atomic in L3
> */
> +  }
> +  else
> +  {
> +    BEGIN_BATCH(gpgpu->batch, 9);
> +  }
>    OUT_BATCH(gpgpu->batch, CMD_LOAD_REGISTER_IMM | 1); /* length - 2
> */
>    OUT_BATCH(gpgpu->batch, GEN7_L3_SQC_REG1_ADDRESS_OFFSET);
>    OUT_BATCH(gpgpu->batch, 0x08800000);
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list