[Beignet] [PATCH] Utest: Add test for half type subgroup functions

Yang, Rong R rong.r.yang at intel.com
Wed Aug 31 06:48:54 UTC 2016


LGTM, will push it later, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Thursday, August 18, 2016 12:57
> To: beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: [Beignet] [PATCH] Utest: Add test for half type subgroup functions
> 
> From: Pan Xiuli <xiuli.pan at intel.com>
> 
> Check if device support subgroup and half first, use build options to hide
> code for unsported device.
> V2: Fix half part test case for utest multithread.
> 
> Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> ---
>  kernels/compiler_subgroup_broadcast.cl      |  16 ++++-
>  kernels/compiler_subgroup_reduce.cl         |  19 +++++
>  kernels/compiler_subgroup_scan_exclusive.cl |  19 +++++
> kernels/compiler_subgroup_scan_inclusive.cl |  19 +++++
>  utests/compiler_subgroup_broadcast.cpp      |  27 +++++--
>  utests/compiler_subgroup_reduce.cpp         | 104
> +++++++++++++++++++++++----
>  utests/compiler_subgroup_scan_exclusive.cpp | 107
> ++++++++++++++++++++++++----
> utests/compiler_subgroup_scan_inclusive.cpp | 100
> ++++++++++++++++++++++----
>  8 files changed, 367 insertions(+), 44 deletions(-)
> 
> diff --git a/kernels/compiler_subgroup_broadcast.cl
> b/kernels/compiler_subgroup_broadcast.cl
> index 4f21cf5..8c155ee 100644
> --- a/kernels/compiler_subgroup_broadcast.cl
> +++ b/kernels/compiler_subgroup_broadcast.cl
> @@ -1,7 +1,7 @@
>  /*
>   * Subgroup broadcast 1D functions
>   */
> -
> +#ifndef HALF
>  kernel void compiler_subgroup_broadcast_imm_int(global int *src,
>                                                  global int *dst,
>                                                  uint simd_id) @@ -32,3 +32,17 @@ kernel void
> compiler_subgroup_broadcast_long(global long *src,
>    long broadcast_val = sub_group_broadcast(val, simd_id);
>    dst[index] = broadcast_val;
>  }
> +#else
> +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void
> +compiler_subgroup_broadcast_half(global half *src,
> +                                                global half *dst,
> +                                                uint simd_id) {
> +  uint index = get_global_id(0);
> +
> +  half val = src[index];
> +  half broadcast_val = sub_group_broadcast(val, simd_id);
> +  printf("%d val %d is %d\n",index,as_ushort(val),
> +as_ushort(broadcast_val));
> +  dst[index] = broadcast_val;
> +}
> +#endif
> diff --git a/kernels/compiler_subgroup_reduce.cl
> b/kernels/compiler_subgroup_reduce.cl
> index 77ffb07..6d7ecfd 100644
> --- a/kernels/compiler_subgroup_reduce.cl
> +++ b/kernels/compiler_subgroup_reduce.cl
> @@ -1,6 +1,7 @@
>  /*
>   * Subgroup any all functions
>   */
> +#ifndef HALF
>  kernel void compiler_subgroup_any(global int *src, global int *dst) {
>    int val = src[get_global_id(0)];
>    int predicate = sub_group_any(val);
> @@ -134,3 +135,21 @@ kernel void
> compiler_subgroup_reduce_min_float(global float *src, global float *
>    float sum = sub_group_reduce_min(val);
>    dst[get_global_id(0)] = sum;
>  }
> +#else
> +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void
> +compiler_subgroup_reduce_add_half(global half *src, global half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_reduce_add(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +kernel void compiler_subgroup_reduce_max_half(global half *src, global
> +half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_reduce_max(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +kernel void compiler_subgroup_reduce_min_half(global half *src, global
> +half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_reduce_min(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +#endif
> diff --git a/kernels/compiler_subgroup_scan_exclusive.cl
> b/kernels/compiler_subgroup_scan_exclusive.cl
> index afc00d0..ca0ada2 100644
> --- a/kernels/compiler_subgroup_scan_exclusive.cl
> +++ b/kernels/compiler_subgroup_scan_exclusive.cl
> @@ -1,6 +1,7 @@
>  /*
>   * Subgroup scan exclusive add functions
>   */
> +#ifndef HALF
>  kernel void compiler_subgroup_scan_exclusive_add_int(global int *src,
> global int *dst) {
>    int val = src[get_global_id(0)];
>    int sum = sub_group_scan_exclusive_add(val);
> @@ -96,3 +97,21 @@ kernel void
> compiler_subgroup_scan_exclusive_min_float(global float *src, global
>    float sum = sub_group_scan_exclusive_min(val);
>    dst[get_global_id(0)] = sum;
>  }
> +#else
> +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void
> +compiler_subgroup_scan_exclusive_add_half(global half *src, global half
> +*dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_scan_exclusive_add(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +kernel void compiler_subgroup_scan_exclusive_max_half(global half *src,
> +global half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_scan_exclusive_max(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +kernel void compiler_subgroup_scan_exclusive_min_half(global half *src,
> +global half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_scan_exclusive_min(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +#endif
> diff --git a/kernels/compiler_subgroup_scan_inclusive.cl
> b/kernels/compiler_subgroup_scan_inclusive.cl
> index da1a6e6..e97521c 100644
> --- a/kernels/compiler_subgroup_scan_inclusive.cl
> +++ b/kernels/compiler_subgroup_scan_inclusive.cl
> @@ -1,6 +1,7 @@
>  /*
>   * Subgroup scan inclusive add functions
>   */
> +#ifndef HALF
>  kernel void compiler_subgroup_scan_inclusive_add_int(global int *src,
> global int *dst) {
>    int val = src[get_global_id(0)];
>    int sum = sub_group_scan_inclusive_add(val);
> @@ -96,3 +97,21 @@ kernel void
> compiler_subgroup_scan_inclusive_min_float(global float *src, global
>    float sum = sub_group_scan_inclusive_min(val);
>    dst[get_global_id(0)] = sum;
>  }
> +#else
> +#pragma OPENCL EXTENSION cl_khr_fp16 : enable kernel void
> +compiler_subgroup_scan_inclusive_add_half(global half *src, global half
> +*dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_scan_inclusive_add(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +kernel void compiler_subgroup_scan_inclusive_max_half(global half *src,
> +global half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_scan_inclusive_max(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +kernel void compiler_subgroup_scan_inclusive_min_half(global half *src,
> +global half *dst) {
> +  half val = src[get_global_id(0)];
> +  half sum = sub_group_scan_inclusive_min(val);
> +  dst[get_global_id(0)] = sum;
> +}
> +#endif
> diff --git a/utests/compiler_subgroup_broadcast.cpp
> b/utests/compiler_subgroup_broadcast.cpp
> index 2835161..9a7979c 100644
> --- a/utests/compiler_subgroup_broadcast.cpp
> +++ b/utests/compiler_subgroup_broadcast.cpp
> @@ -59,10 +59,15 @@ static void generate_data(T* &input,
>        /* initially 0, augment after */
>        input[gid + lid] = 0;
> 
> -      /* check all data types, test ideal for QWORD types */
> -      input[gid + lid] += ((rand() % 2 - 1) * base_val);
> -      /* add trailing random bits, tests GENERAL cases */
> -      input[gid + lid] += (rand() % 112);
> +      if(sizeof(T) == 2) {
> +        input[gid + lid] = __float_to_half(as_uint((float)(gid + lid)));
> +      }
> +      else {
> +        /* check all data types, test ideal for QWORD types */
> +        input[gid + lid] += ((rand() % 2 - 1) * base_val);
> +        /* add trailing random bits, tests GENERAL cases */
> +        input[gid + lid] += (rand() % 112);
> +      }
> 
>  #if DEBUG_STDOUT
>        /* output generated input */
> @@ -185,3 +190,17 @@ void compiler_subgroup_broadcast_long(void)
>    subgroup_generic(input, expected);
>  }
> 
> MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_subgroup_broadca
> st_long);
> +void compiler_subgroup_broadcast_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_broadcast.cl",
> +                           "compiler_subgroup_broadcast_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(input, expected);
> +}
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_broadcast_half);
> diff --git a/utests/compiler_subgroup_reduce.cpp
> b/utests/compiler_subgroup_reduce.cpp
> index 3c3df06..ff545c6 100644
> --- a/utests/compiler_subgroup_reduce.cpp
> +++ b/utests/compiler_subgroup_reduce.cpp
> @@ -33,7 +33,8 @@ template<class T>
>  static void compute_expected(WG_FUNCTION wg_func,
>                      T* input,
>                      T* expected,
> -                    size_t SIMD_SIZE)
> +                    size_t SIMD_SIZE,
> +                    bool IS_HALF)
>  {
>    if(wg_func == WG_ANY)
>    {
> @@ -54,24 +55,43 @@ static void compute_expected(WG_FUNCTION
> wg_func,
>    else if(wg_func == WG_REDUCE_ADD)
>    {
>      T wg_sum = input[0];
> -    for(uint32_t i = 1; i < SIMD_SIZE; i++)
> -      wg_sum += input[i];
> +    if(IS_HALF) {
> +      float wg_sum_tmp = 0.0f;
> +      for(uint32_t i = 0; i < SIMD_SIZE; i++) {
> +        wg_sum_tmp += as_float(__half_to_float(input[i]));
> +      }
> +      wg_sum = __float_to_half(as_uint(wg_sum_tmp));
> +    }
> +    else {
> +      for(uint32_t i = 1; i < SIMD_SIZE; i++)
> +        wg_sum += input[i];
> +    }
>      for(uint32_t i = 0; i < SIMD_SIZE; i++)
>        expected[i] = wg_sum;
>    }
>    else if(wg_func == WG_REDUCE_MAX)
>    {
>      T wg_max = input[0];
> -    for(uint32_t i = 1; i < SIMD_SIZE; i++)
> -      wg_max = max(input[i], wg_max);
> +    for(uint32_t i = 1; i < SIMD_SIZE; i++) {
> +      if (IS_HALF) {
> +        wg_max = (as_float(__half_to_float(input[i])) >
> as_float(__half_to_float(wg_max))) ? input[i] : wg_max;
> +      }
> +      else
> +        wg_max = max(input[i], wg_max);
> +    }
>      for(uint32_t i = 0; i < SIMD_SIZE; i++)
>        expected[i] = wg_max;
>    }
>    else if(wg_func == WG_REDUCE_MIN)
>    {
>      T wg_min = input[0];
> -    for(uint32_t i = 1; i < SIMD_SIZE; i++)
> -      wg_min = min(input[i], wg_min);
> +    for(uint32_t i = 1; i < SIMD_SIZE; i++) {
> +      if (IS_HALF) {
> +        wg_min= (as_float(__half_to_float(input[i])) <
> as_float(__half_to_float(wg_min))) ? input[i] : wg_min;
> +      }
> +      else
> +        wg_min = min(input[i], wg_min);
> +    }
>      for(uint32_t i = 0; i < SIMD_SIZE; i++)
>        expected[i] = wg_min;
>    }
> @@ -85,7 +105,8 @@ template<class T>
>  static void generate_data(WG_FUNCTION wg_func,
>                     T* &input,
>                     T* &expected,
> -                   size_t SIMD_SIZE)
> +                   size_t SIMD_SIZE,
> +                   bool IS_HALF)
>  {
>    input = new T[WG_GLOBAL_SIZE];
>    expected = new T[WG_GLOBAL_SIZE];
> @@ -115,6 +136,8 @@ static void generate_data(WG_FUNCTION wg_func,
>          /* add trailing random bits, tests GENERAL cases */
>          input[gid + lid] += (rand() % 112);
>          /* always last bit is 1, ideal test ALL/ANY */
> +        if (IS_HALF)
> +          input[gid + lid] = __float_to_half(as_uint((float)input[gid +
> + lid]/2));
>        } else {
>          input[gid + lid] += rand();
>          input[gid + lid] += rand() / ((float)RAND_MAX + 1); @@ -129,7 +152,7
> @@ static void generate_data(WG_FUNCTION wg_func,
>      }
> 
>      /* expected values */
> -    compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE);
> +    compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE,
> + IS_HALF);
> 
>  #if DEBUG_STDOUT
>      /* output expected input */
> @@ -152,7 +175,8 @@ static void generate_data(WG_FUNCTION wg_func,
> template<class T>  static void subgroup_generic(WG_FUNCTION wg_func,
>                         T* input,
> -                       T* expected)
> +                       T* expected,
> +                       bool IS_HALF = false)
>  {
>    /* get simd size */
>    globals[0] = WG_GLOBAL_SIZE;
> @@ -161,7 +185,7 @@ static void subgroup_generic(WG_FUNCTION
> wg_func,
> 
> OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_M
> AX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(si
> ze_t),&SIMD_SIZE,NULL);
> 
>    /* input and expected data */
> -  generate_data(wg_func, input, expected, SIMD_SIZE);
> +  generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF);
> 
>    /* prepare input for data type */
>    OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@
> -185,8 +209,22 @@ static void subgroup_generic(WG_FUNCTION wg_func,
>    for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
>      if(((T *)buf_data[1])[i] != *(expected + i))
>      {
> +      if (IS_HALF) {
> +        float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i]));
> +        float num_expected = as_float(__half_to_float(*(expected + i)));
> +        float num_diff = abs(num_computed - num_expected) /
> abs(num_expected);
> +        if (num_diff > 0.03f) {
> +          mismatches++;
> +        }
> +#if DEBUG_STDOUT
> +          /* output mismatch */
> +          cout << "Err at " << i << ", " << num_computed
> +               << " != " << num_expected << " diff: " <<num_diff
> +<<endl; #endif
> +        //}
> +      }
>        /* found mismatch on integer, increment */
> -      if (numeric_limits<T>::is_integer) {
> +      else if (numeric_limits<T>::is_integer) {
>          mismatches++;
> 
>  #if DEBUG_STDOUT
> @@ -305,6 +343,20 @@ void compiler_subgroup_reduce_add_float(void)
>    subgroup_generic(WG_REDUCE_ADD, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_float);
> +void compiler_subgroup_reduce_add_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl",
> +                           "compiler_subgroup_reduce_add_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_REDUCE_ADD, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_half);
> 
>  /*
>   * Workgroup reduce max utest functions @@ -364,6 +416,20 @@ void
> compiler_subgroup_reduce_max_float(void)
>    subgroup_generic(WG_REDUCE_MAX, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_float);
> +void compiler_subgroup_reduce_max_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl",
> +                           "compiler_subgroup_reduce_max_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_REDUCE_MAX, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_half);
> 
>  /*
>   * Workgroup reduce min utest functions @@ -423,3 +489,17 @@ void
> compiler_subgroup_reduce_min_float(void)
>    subgroup_generic(WG_REDUCE_MIN, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_float);
> +void compiler_subgroup_reduce_min_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl",
> +                           "compiler_subgroup_reduce_min_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_REDUCE_MIN, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_half);
> diff --git a/utests/compiler_subgroup_scan_exclusive.cpp
> b/utests/compiler_subgroup_scan_exclusive.cpp
> index 1a21b59..e51b78d 100644
> --- a/utests/compiler_subgroup_scan_exclusive.cpp
> +++ b/utests/compiler_subgroup_scan_exclusive.cpp
> @@ -32,36 +32,56 @@ template<class T>
>  static void compute_expected(WG_FUNCTION wg_func,
>                      T* input,
>                      T* expected,
> -                    size_t SIMD_SIZE)
> +                    size_t SIMD_SIZE,
> +                    bool IS_HALF)
>  {
>    if(wg_func == WG_SCAN_EXCLUSIVE_ADD)
>    {
>      expected[0] = 0;
>      expected[1] = input[0];
> -    for(uint32_t i = 2; i < SIMD_SIZE; i++)
> -      expected[i] = input[i - 1] + expected[i - 1];
> +    for(uint32_t i = 2; i < SIMD_SIZE; i++) {
> +      if (IS_HALF)
> +        expected[i] = __float_to_half(as_uint(as_float(__half_to_float(input[i
> - 1])) +
> +                                              as_float(__half_to_float(expected[i - 1]))));
> +      else
> +        expected[i] = input[i - 1] + expected[i - 1];
> +    }
>    }
>    else if(wg_func == WG_SCAN_EXCLUSIVE_MAX)
>    {
> -    if(numeric_limits<T>::is_integer)
> +    if(IS_HALF)
> +      expected[0] = 0xFC00;
> +    else if(numeric_limits<T>::is_integer)
>        expected[0] = numeric_limits<T>::min();
>      else
>        expected[0] = - numeric_limits<T>::infinity();
> 
>      expected[1] = input[0];
> -    for(uint32_t i = 2; i < SIMD_SIZE; i++)
> -      expected[i] = max(input[i - 1], expected[i - 1]);
> +    for(uint32_t i = 2; i < SIMD_SIZE; i++) {
> +      if (IS_HALF)
> +        expected[i] = (as_float(__half_to_float(input[i - 1])) >
> as_float(__half_to_float(expected[i - 1]))) ?
> +                      input[i - 1] : expected[i - 1];
> +      else
> +        expected[i] = max(input[i - 1], expected[i - 1]);
> +    }
>    }
>    else if(wg_func == WG_SCAN_EXCLUSIVE_MIN)
>    {
> -    if(numeric_limits<T>::is_integer)
> +    if(IS_HALF)
> +      expected[0] = 0x7C00;
> +    else if(numeric_limits<T>::is_integer)
>        expected[0] = numeric_limits<T>::max();
>      else
>        expected[0] = numeric_limits<T>::infinity();
> 
>      expected[1] = input[0];
> -    for(uint32_t i = 2; i < SIMD_SIZE; i++)
> -      expected[i] = min(input[i - 1], expected[i - 1]);
> +    for(uint32_t i = 2; i < SIMD_SIZE; i++) {
> +      if (IS_HALF)
> +        expected[i] = (as_float(__half_to_float(input[i - 1])) <
> as_float(__half_to_float(expected[i - 1]))) ?
> +                      input[i - 1] : expected[i - 1];
> +      else
> +        expected[i] = min(input[i - 1], expected[i - 1]);
> +    }
>    }
>  }
> 
> @@ -73,7 +93,8 @@ template<class T>
>  static void generate_data(WG_FUNCTION wg_func,
>                     T* &input,
>                     T* &expected,
> -                   size_t SIMD_SIZE)
> +                   size_t SIMD_SIZE,
> +                   bool IS_HALF)
>  {
>    input = new T[WG_GLOBAL_SIZE];
>    expected = new T[WG_GLOBAL_SIZE];
> @@ -101,6 +122,8 @@ static void generate_data(WG_FUNCTION wg_func,
>        input[gid + lid] += ((rand() % 2 - 1) * base_val);
>        /* add trailing random bits, tests GENERAL cases */
>        input[gid + lid] += (rand() % 112);
> +      if (IS_HALF)
> +        input[gid + lid] = __float_to_half(as_uint((float)input[gid +
> + lid]/2));
> 
>  #if DEBUG_STDOUT
>        /* output generated input */
> @@ -111,7 +134,7 @@ static void generate_data(WG_FUNCTION wg_func,
>      }
> 
>      /* expected values */
> -    compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE);
> +    compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE,
> + IS_HALF);
> 
>  #if DEBUG_STDOUT
>      /* output expected input */
> @@ -134,7 +157,8 @@ static void generate_data(WG_FUNCTION wg_func,
> template<class T>  static void subgroup_generic(WG_FUNCTION wg_func,
>                         T* input,
> -                       T* expected)
> +                       T* expected,
> +                       bool IS_HALF = false)
>  {
>    /* get simd size */
>    globals[0] = WG_GLOBAL_SIZE;
> @@ -143,7 +167,7 @@ static void subgroup_generic(WG_FUNCTION
> wg_func,
> 
> OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_M
> AX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(si
> ze_t),&SIMD_SIZE,NULL);
> 
>    /* input and expected data */
> -  generate_data(wg_func, input, expected, SIMD_SIZE);
> +  generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF);
> 
>    /* prepare input for data type */
>    OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@
> -166,8 +190,21 @@ static void subgroup_generic(WG_FUNCTION wg_func,
>    for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
>      if(((T *)buf_data[1])[i] != *(expected + i))
>      {
> +      if (IS_HALF) {
> +        float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i]));
> +        float num_expected = as_float(__half_to_float(*(expected + i)));
> +        float num_diff = abs(num_computed - num_expected) /
> abs(num_expected);
> +        if (num_diff > 0.03f) {
> +          mismatches++;
> +#if DEBUG_STDOUT
> +          /* output mismatch */
> +          cout << "Err at " << i << ", " << num_computed
> +               << " != " << num_expected <<" diff: " <<num_diff <<endl;
> +#endif
> +        }
> +      }
>        /* found mismatch on integer, increment */
> -      if(numeric_limits<T>::is_integer){
> +      else if (numeric_limits<T>::is_integer) {
>          mismatches++;
> 
>  #if DEBUG_STDOUT
> @@ -261,6 +298,20 @@ void
> compiler_subgroup_scan_exclusive_add_float(void)
>    subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add_f
> loat);
> +void compiler_subgroup_scan_exclusive_add_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl",
> +                           "compiler_subgroup_scan_exclusive_add_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add
> _half);
> 
>  /*
>   * Workgroup scan_exclusive max utest functions @@ -320,6 +371,20 @@
> void compiler_subgroup_scan_exclusive_max_float(void)
>    subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max_
> float);
> +void compiler_subgroup_scan_exclusive_max_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl",
> +                           "compiler_subgroup_scan_exclusive_max_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max
> _half);
> 
>  /*
>   * Workgroup scan_exclusive min utest functions @@ -379,3 +444,17 @@
> void compiler_subgroup_scan_exclusive_min_float(void)
>    subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min_f
> loat);
> +void compiler_subgroup_scan_exclusive_min_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl",
> +                           "compiler_subgroup_scan_exclusive_min_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min
> _half);
> diff --git a/utests/compiler_subgroup_scan_inclusive.cpp
> b/utests/compiler_subgroup_scan_inclusive.cpp
> index fa32855..0f0df1c 100644
> --- a/utests/compiler_subgroup_scan_inclusive.cpp
> +++ b/utests/compiler_subgroup_scan_inclusive.cpp
> @@ -32,25 +32,41 @@ template<class T>
>  static void compute_expected(WG_FUNCTION wg_func,
>                      T* input,
>                      T* expected,
> -                    size_t SIMD_SIZE)
> +                    size_t SIMD_SIZE,
> +                    bool IS_HALF)
>  {
>    if(wg_func == WG_SCAN_INCLUSIVE_ADD)
>    {
>      expected[0] = input[0];
> -    for(uint32_t i = 1; i < SIMD_SIZE; i++)
> -      expected[i] = input[i] + expected[i - 1];
> +    for(uint32_t i = 1; i < SIMD_SIZE; i++) {
> +      if (IS_HALF)
> +        expected[i] =
> __float_to_half(as_uint(as_float(__half_to_float(input[i])) +
> +                                              as_float(__half_to_float(expected[i - 1]))));
> +      else
> +        expected[i] = input[i] + expected[i - 1];
> +    }
>    }
>    else if(wg_func == WG_SCAN_INCLUSIVE_MAX)
>    {
>      expected[0] = input[0];
> -    for(uint32_t i = 1; i < SIMD_SIZE; i++)
> -      expected[i] = max(input[i], expected[i - 1]);
> +    for(uint32_t i = 1; i < SIMD_SIZE; i++) {
> +      if (IS_HALF)
> +        expected[i] = (as_float(__half_to_float(input[i])) >
> as_float(__half_to_float(expected[i - 1]))) ?
> +                      input[i] : expected[i - 1];
> +      else
> +        expected[i] = max(input[i], expected[i - 1]);
> +    }
>    }
>    else if(wg_func == WG_SCAN_INCLUSIVE_MIN)
>    {
>      expected[0] = input[0];
> -    for(uint32_t i = 1; i < SIMD_SIZE; i++)
> -      expected[i] = min(input[i], expected[i - 1]);
> +    for(uint32_t i = 1; i < SIMD_SIZE; i++) {
> +      if (IS_HALF)
> +        expected[i] = (as_float(__half_to_float(input[i])) <
> as_float(__half_to_float(expected[i - 1]))) ?
> +                      input[i] : expected[i - 1];
> +      else
> +        expected[i] = min(input[i], expected[i - 1]);
> +    }
>    }
>  }
> 
> @@ -62,7 +78,8 @@ template<class T>
>  static void generate_data(WG_FUNCTION wg_func,
>                     T* &input,
>                     T* &expected,
> -                   size_t SIMD_SIZE)
> +                   size_t SIMD_SIZE,
> +                   bool IS_HALF)
>  {
>    input = new T[WG_GLOBAL_SIZE];
>    expected = new T[WG_GLOBAL_SIZE];
> @@ -91,6 +108,8 @@ static void generate_data(WG_FUNCTION wg_func,
>        input[gid + lid] += ((rand() % 2 - 1) * base_val);
>        /* add trailing random bits, tests GENERAL cases */
>        input[gid + lid] += (rand() % 112);
> +      if (IS_HALF)
> +        input[gid + lid] = __float_to_half(as_uint((float)input[gid +
> + lid]/2));
> 
>  #if DEBUG_STDOUT
>        /* output generated input */
> @@ -101,7 +120,7 @@ static void generate_data(WG_FUNCTION wg_func,
>      }
> 
>      /* expected values */
> -    compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE);
> +    compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE,
> + IS_HALF);
> 
>  #if DEBUG_STDOUT
>      /* output expected input */
> @@ -124,7 +143,8 @@ static void generate_data(WG_FUNCTION wg_func,
> template<class T>  static void subgroup_generic(WG_FUNCTION wg_func,
>                         T* input,
> -                       T* expected)
> +                       T* expected,
> +                       bool IS_HALF = false)
>  {
>    /* get simd size */
>    globals[0] = WG_GLOBAL_SIZE;
> @@ -133,7 +153,7 @@ static void subgroup_generic(WG_FUNCTION
> wg_func,
> 
> OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_M
> AX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(si
> ze_t),&SIMD_SIZE,NULL);
> 
>    /* input and expected data */
> -  generate_data(wg_func, input, expected, SIMD_SIZE);
> +  generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF);
> 
>    /* prepare input for data type */
>    OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL); @@
> -156,8 +176,21 @@ static void subgroup_generic(WG_FUNCTION wg_func,
>    for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
>      if(((T *)buf_data[1])[i] != *(expected + i))
>      {
> +      if (IS_HALF) {
> +        float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i]));
> +        float num_expected = as_float(__half_to_float(*(expected + i)));
> +        float num_diff = abs(num_computed - num_expected) /
> abs(num_expected);
> +        if (num_diff > 0.03f) {
> +          mismatches++;
> +#if DEBUG_STDOUT
> +          /* output mismatch */
> +          cout << "Err at " << i << ", " << num_computed
> +               << " != " << num_expected <<" diff: " <<num_diff <<endl;
> +#endif
> +        }
> +      }
>        /* found mismatch on integer, increment */
> -      if(numeric_limits<T>::is_integer){
> +      else if (numeric_limits<T>::is_integer) {
>          mismatches++;
> 
>  #if DEBUG_STDOUT
> @@ -251,6 +284,20 @@ void
> compiler_subgroup_scan_inclusive_add_float(void)
>    subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_fl
> oat);
> +void compiler_subgroup_scan_inclusive_add_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl",
> +                           "compiler_subgroup_scan_inclusive_add_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_
> half);
> 
>  /*
>   * Workgroup scan_inclusive max utest functions @@ -310,6 +357,20 @@
> void compiler_subgroup_scan_inclusive_max_float(void)
>    subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max_f
> loat);
> +void compiler_subgroup_scan_inclusive_max_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl",
> +                           "compiler_subgroup_scan_inclusive_max_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max
> _half);
> 
>  /*
>   * Workgroup scan_inclusive min utest functions @@ -369,4 +430,17 @@
> void compiler_subgroup_scan_inclusive_min_float(void)
>    subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected);  }
> MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_fl
> oat);
> -
> +void compiler_subgroup_scan_inclusive_min_half(void)
> +{
> +  if(!cl_check_subgroups())
> +    return;
> +  if(!cl_check_half())
> +    return;
> +  cl_half *input = NULL;
> +  cl_half *expected = NULL;
> +  OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl",
> +                           "compiler_subgroup_scan_inclusive_min_half",
> +                           SOURCE, "-DHALF");
> +  subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected, true); }
> +MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_
> half);
> --
> 2.7.4
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list