[Beignet] [PATCH] Utest: Add test for half type subgroup functions
Xiuli Pan
xiuli.pan at intel.com
Thu Aug 18 04:56:37 UTC 2016
From: Pan Xiuli <xiuli.pan at intel.com>
Check if device support subgroup and half first, use build options
to hide code for unsported device.
V2: Fix half part test case for utest multithread.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
kernels/compiler_subgroup_broadcast.cl | 16 ++++-
kernels/compiler_subgroup_reduce.cl | 19 +++++
kernels/compiler_subgroup_scan_exclusive.cl | 19 +++++
kernels/compiler_subgroup_scan_inclusive.cl | 19 +++++
utests/compiler_subgroup_broadcast.cpp | 27 +++++--
utests/compiler_subgroup_reduce.cpp | 104 +++++++++++++++++++++++----
utests/compiler_subgroup_scan_exclusive.cpp | 107 ++++++++++++++++++++++++----
utests/compiler_subgroup_scan_inclusive.cpp | 100 ++++++++++++++++++++++----
8 files changed, 367 insertions(+), 44 deletions(-)
diff --git a/kernels/compiler_subgroup_broadcast.cl b/kernels/compiler_subgroup_broadcast.cl
index 4f21cf5..8c155ee 100644
--- a/kernels/compiler_subgroup_broadcast.cl
+++ b/kernels/compiler_subgroup_broadcast.cl
@@ -1,7 +1,7 @@
/*
* Subgroup broadcast 1D functions
*/
-
+#ifndef HALF
kernel void compiler_subgroup_broadcast_imm_int(global int *src,
global int *dst,
uint simd_id)
@@ -32,3 +32,17 @@ kernel void compiler_subgroup_broadcast_long(global long *src,
long broadcast_val = sub_group_broadcast(val, simd_id);
dst[index] = broadcast_val;
}
+#else
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_subgroup_broadcast_half(global half *src,
+ global half *dst,
+ uint simd_id)
+{
+ uint index = get_global_id(0);
+
+ half val = src[index];
+ half broadcast_val = sub_group_broadcast(val, simd_id);
+ printf("%d val %d is %d\n",index,as_ushort(val), as_ushort(broadcast_val));
+ dst[index] = broadcast_val;
+}
+#endif
diff --git a/kernels/compiler_subgroup_reduce.cl b/kernels/compiler_subgroup_reduce.cl
index 77ffb07..6d7ecfd 100644
--- a/kernels/compiler_subgroup_reduce.cl
+++ b/kernels/compiler_subgroup_reduce.cl
@@ -1,6 +1,7 @@
/*
* Subgroup any all functions
*/
+#ifndef HALF
kernel void compiler_subgroup_any(global int *src, global int *dst) {
int val = src[get_global_id(0)];
int predicate = sub_group_any(val);
@@ -134,3 +135,21 @@ kernel void compiler_subgroup_reduce_min_float(global float *src, global float *
float sum = sub_group_reduce_min(val);
dst[get_global_id(0)] = sum;
}
+#else
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_subgroup_reduce_add_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_reduce_add(val);
+ dst[get_global_id(0)] = sum;
+}
+kernel void compiler_subgroup_reduce_max_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_reduce_max(val);
+ dst[get_global_id(0)] = sum;
+}
+kernel void compiler_subgroup_reduce_min_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_reduce_min(val);
+ dst[get_global_id(0)] = sum;
+}
+#endif
diff --git a/kernels/compiler_subgroup_scan_exclusive.cl b/kernels/compiler_subgroup_scan_exclusive.cl
index afc00d0..ca0ada2 100644
--- a/kernels/compiler_subgroup_scan_exclusive.cl
+++ b/kernels/compiler_subgroup_scan_exclusive.cl
@@ -1,6 +1,7 @@
/*
* Subgroup scan exclusive add functions
*/
+#ifndef HALF
kernel void compiler_subgroup_scan_exclusive_add_int(global int *src, global int *dst) {
int val = src[get_global_id(0)];
int sum = sub_group_scan_exclusive_add(val);
@@ -96,3 +97,21 @@ kernel void compiler_subgroup_scan_exclusive_min_float(global float *src, global
float sum = sub_group_scan_exclusive_min(val);
dst[get_global_id(0)] = sum;
}
+#else
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_subgroup_scan_exclusive_add_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_scan_exclusive_add(val);
+ dst[get_global_id(0)] = sum;
+}
+kernel void compiler_subgroup_scan_exclusive_max_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_scan_exclusive_max(val);
+ dst[get_global_id(0)] = sum;
+}
+kernel void compiler_subgroup_scan_exclusive_min_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_scan_exclusive_min(val);
+ dst[get_global_id(0)] = sum;
+}
+#endif
diff --git a/kernels/compiler_subgroup_scan_inclusive.cl b/kernels/compiler_subgroup_scan_inclusive.cl
index da1a6e6..e97521c 100644
--- a/kernels/compiler_subgroup_scan_inclusive.cl
+++ b/kernels/compiler_subgroup_scan_inclusive.cl
@@ -1,6 +1,7 @@
/*
* Subgroup scan inclusive add functions
*/
+#ifndef HALF
kernel void compiler_subgroup_scan_inclusive_add_int(global int *src, global int *dst) {
int val = src[get_global_id(0)];
int sum = sub_group_scan_inclusive_add(val);
@@ -96,3 +97,21 @@ kernel void compiler_subgroup_scan_inclusive_min_float(global float *src, global
float sum = sub_group_scan_inclusive_min(val);
dst[get_global_id(0)] = sum;
}
+#else
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_subgroup_scan_inclusive_add_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_scan_inclusive_add(val);
+ dst[get_global_id(0)] = sum;
+}
+kernel void compiler_subgroup_scan_inclusive_max_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_scan_inclusive_max(val);
+ dst[get_global_id(0)] = sum;
+}
+kernel void compiler_subgroup_scan_inclusive_min_half(global half *src, global half *dst) {
+ half val = src[get_global_id(0)];
+ half sum = sub_group_scan_inclusive_min(val);
+ dst[get_global_id(0)] = sum;
+}
+#endif
diff --git a/utests/compiler_subgroup_broadcast.cpp b/utests/compiler_subgroup_broadcast.cpp
index 2835161..9a7979c 100644
--- a/utests/compiler_subgroup_broadcast.cpp
+++ b/utests/compiler_subgroup_broadcast.cpp
@@ -59,10 +59,15 @@ static void generate_data(T* &input,
/* initially 0, augment after */
input[gid + lid] = 0;
- /* check all data types, test ideal for QWORD types */
- input[gid + lid] += ((rand() % 2 - 1) * base_val);
- /* add trailing random bits, tests GENERAL cases */
- input[gid + lid] += (rand() % 112);
+ if(sizeof(T) == 2) {
+ input[gid + lid] = __float_to_half(as_uint((float)(gid + lid)));
+ }
+ else {
+ /* check all data types, test ideal for QWORD types */
+ input[gid + lid] += ((rand() % 2 - 1) * base_val);
+ /* add trailing random bits, tests GENERAL cases */
+ input[gid + lid] += (rand() % 112);
+ }
#if DEBUG_STDOUT
/* output generated input */
@@ -185,3 +190,17 @@ void compiler_subgroup_broadcast_long(void)
subgroup_generic(input, expected);
}
MAKE_UTEST_FROM_FUNCTION_WITH_ISSUE(compiler_subgroup_broadcast_long);
+void compiler_subgroup_broadcast_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_broadcast.cl",
+ "compiler_subgroup_broadcast_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(input, expected);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_broadcast_half);
diff --git a/utests/compiler_subgroup_reduce.cpp b/utests/compiler_subgroup_reduce.cpp
index 3c3df06..ff545c6 100644
--- a/utests/compiler_subgroup_reduce.cpp
+++ b/utests/compiler_subgroup_reduce.cpp
@@ -33,7 +33,8 @@ template<class T>
static void compute_expected(WG_FUNCTION wg_func,
T* input,
T* expected,
- size_t SIMD_SIZE)
+ size_t SIMD_SIZE,
+ bool IS_HALF)
{
if(wg_func == WG_ANY)
{
@@ -54,24 +55,43 @@ static void compute_expected(WG_FUNCTION wg_func,
else if(wg_func == WG_REDUCE_ADD)
{
T wg_sum = input[0];
- for(uint32_t i = 1; i < SIMD_SIZE; i++)
- wg_sum += input[i];
+ if(IS_HALF) {
+ float wg_sum_tmp = 0.0f;
+ for(uint32_t i = 0; i < SIMD_SIZE; i++) {
+ wg_sum_tmp += as_float(__half_to_float(input[i]));
+ }
+ wg_sum = __float_to_half(as_uint(wg_sum_tmp));
+ }
+ else {
+ for(uint32_t i = 1; i < SIMD_SIZE; i++)
+ wg_sum += input[i];
+ }
for(uint32_t i = 0; i < SIMD_SIZE; i++)
expected[i] = wg_sum;
}
else if(wg_func == WG_REDUCE_MAX)
{
T wg_max = input[0];
- for(uint32_t i = 1; i < SIMD_SIZE; i++)
- wg_max = max(input[i], wg_max);
+ for(uint32_t i = 1; i < SIMD_SIZE; i++) {
+ if (IS_HALF) {
+ wg_max = (as_float(__half_to_float(input[i])) > as_float(__half_to_float(wg_max))) ? input[i] : wg_max;
+ }
+ else
+ wg_max = max(input[i], wg_max);
+ }
for(uint32_t i = 0; i < SIMD_SIZE; i++)
expected[i] = wg_max;
}
else if(wg_func == WG_REDUCE_MIN)
{
T wg_min = input[0];
- for(uint32_t i = 1; i < SIMD_SIZE; i++)
- wg_min = min(input[i], wg_min);
+ for(uint32_t i = 1; i < SIMD_SIZE; i++) {
+ if (IS_HALF) {
+ wg_min= (as_float(__half_to_float(input[i])) < as_float(__half_to_float(wg_min))) ? input[i] : wg_min;
+ }
+ else
+ wg_min = min(input[i], wg_min);
+ }
for(uint32_t i = 0; i < SIMD_SIZE; i++)
expected[i] = wg_min;
}
@@ -85,7 +105,8 @@ template<class T>
static void generate_data(WG_FUNCTION wg_func,
T* &input,
T* &expected,
- size_t SIMD_SIZE)
+ size_t SIMD_SIZE,
+ bool IS_HALF)
{
input = new T[WG_GLOBAL_SIZE];
expected = new T[WG_GLOBAL_SIZE];
@@ -115,6 +136,8 @@ static void generate_data(WG_FUNCTION wg_func,
/* add trailing random bits, tests GENERAL cases */
input[gid + lid] += (rand() % 112);
/* always last bit is 1, ideal test ALL/ANY */
+ if (IS_HALF)
+ input[gid + lid] = __float_to_half(as_uint((float)input[gid + lid]/2));
} else {
input[gid + lid] += rand();
input[gid + lid] += rand() / ((float)RAND_MAX + 1);
@@ -129,7 +152,7 @@ static void generate_data(WG_FUNCTION wg_func,
}
/* expected values */
- compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE);
+ compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, IS_HALF);
#if DEBUG_STDOUT
/* output expected input */
@@ -152,7 +175,8 @@ static void generate_data(WG_FUNCTION wg_func,
template<class T>
static void subgroup_generic(WG_FUNCTION wg_func,
T* input,
- T* expected)
+ T* expected,
+ bool IS_HALF = false)
{
/* get simd size */
globals[0] = WG_GLOBAL_SIZE;
@@ -161,7 +185,7 @@ static void subgroup_generic(WG_FUNCTION wg_func,
OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL);
/* input and expected data */
- generate_data(wg_func, input, expected, SIMD_SIZE);
+ generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF);
/* prepare input for data type */
OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL);
@@ -185,8 +209,22 @@ static void subgroup_generic(WG_FUNCTION wg_func,
for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
if(((T *)buf_data[1])[i] != *(expected + i))
{
+ if (IS_HALF) {
+ float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i]));
+ float num_expected = as_float(__half_to_float(*(expected + i)));
+ float num_diff = abs(num_computed - num_expected) / abs(num_expected);
+ if (num_diff > 0.03f) {
+ mismatches++;
+ }
+#if DEBUG_STDOUT
+ /* output mismatch */
+ cout << "Err at " << i << ", " << num_computed
+ << " != " << num_expected << " diff: " <<num_diff <<endl;
+#endif
+ //}
+ }
/* found mismatch on integer, increment */
- if (numeric_limits<T>::is_integer) {
+ else if (numeric_limits<T>::is_integer) {
mismatches++;
#if DEBUG_STDOUT
@@ -305,6 +343,20 @@ void compiler_subgroup_reduce_add_float(void)
subgroup_generic(WG_REDUCE_ADD, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_float);
+void compiler_subgroup_reduce_add_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl",
+ "compiler_subgroup_reduce_add_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_REDUCE_ADD, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_add_half);
/*
* Workgroup reduce max utest functions
@@ -364,6 +416,20 @@ void compiler_subgroup_reduce_max_float(void)
subgroup_generic(WG_REDUCE_MAX, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_float);
+void compiler_subgroup_reduce_max_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl",
+ "compiler_subgroup_reduce_max_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_REDUCE_MAX, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_max_half);
/*
* Workgroup reduce min utest functions
@@ -423,3 +489,17 @@ void compiler_subgroup_reduce_min_float(void)
subgroup_generic(WG_REDUCE_MIN, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_float);
+void compiler_subgroup_reduce_min_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_reduce.cl",
+ "compiler_subgroup_reduce_min_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_REDUCE_MIN, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_reduce_min_half);
diff --git a/utests/compiler_subgroup_scan_exclusive.cpp b/utests/compiler_subgroup_scan_exclusive.cpp
index 1a21b59..e51b78d 100644
--- a/utests/compiler_subgroup_scan_exclusive.cpp
+++ b/utests/compiler_subgroup_scan_exclusive.cpp
@@ -32,36 +32,56 @@ template<class T>
static void compute_expected(WG_FUNCTION wg_func,
T* input,
T* expected,
- size_t SIMD_SIZE)
+ size_t SIMD_SIZE,
+ bool IS_HALF)
{
if(wg_func == WG_SCAN_EXCLUSIVE_ADD)
{
expected[0] = 0;
expected[1] = input[0];
- for(uint32_t i = 2; i < SIMD_SIZE; i++)
- expected[i] = input[i - 1] + expected[i - 1];
+ for(uint32_t i = 2; i < SIMD_SIZE; i++) {
+ if (IS_HALF)
+ expected[i] = __float_to_half(as_uint(as_float(__half_to_float(input[i - 1])) +
+ as_float(__half_to_float(expected[i - 1]))));
+ else
+ expected[i] = input[i - 1] + expected[i - 1];
+ }
}
else if(wg_func == WG_SCAN_EXCLUSIVE_MAX)
{
- if(numeric_limits<T>::is_integer)
+ if(IS_HALF)
+ expected[0] = 0xFC00;
+ else if(numeric_limits<T>::is_integer)
expected[0] = numeric_limits<T>::min();
else
expected[0] = - numeric_limits<T>::infinity();
expected[1] = input[0];
- for(uint32_t i = 2; i < SIMD_SIZE; i++)
- expected[i] = max(input[i - 1], expected[i - 1]);
+ for(uint32_t i = 2; i < SIMD_SIZE; i++) {
+ if (IS_HALF)
+ expected[i] = (as_float(__half_to_float(input[i - 1])) > as_float(__half_to_float(expected[i - 1]))) ?
+ input[i - 1] : expected[i - 1];
+ else
+ expected[i] = max(input[i - 1], expected[i - 1]);
+ }
}
else if(wg_func == WG_SCAN_EXCLUSIVE_MIN)
{
- if(numeric_limits<T>::is_integer)
+ if(IS_HALF)
+ expected[0] = 0x7C00;
+ else if(numeric_limits<T>::is_integer)
expected[0] = numeric_limits<T>::max();
else
expected[0] = numeric_limits<T>::infinity();
expected[1] = input[0];
- for(uint32_t i = 2; i < SIMD_SIZE; i++)
- expected[i] = min(input[i - 1], expected[i - 1]);
+ for(uint32_t i = 2; i < SIMD_SIZE; i++) {
+ if (IS_HALF)
+ expected[i] = (as_float(__half_to_float(input[i - 1])) < as_float(__half_to_float(expected[i - 1]))) ?
+ input[i - 1] : expected[i - 1];
+ else
+ expected[i] = min(input[i - 1], expected[i - 1]);
+ }
}
}
@@ -73,7 +93,8 @@ template<class T>
static void generate_data(WG_FUNCTION wg_func,
T* &input,
T* &expected,
- size_t SIMD_SIZE)
+ size_t SIMD_SIZE,
+ bool IS_HALF)
{
input = new T[WG_GLOBAL_SIZE];
expected = new T[WG_GLOBAL_SIZE];
@@ -101,6 +122,8 @@ static void generate_data(WG_FUNCTION wg_func,
input[gid + lid] += ((rand() % 2 - 1) * base_val);
/* add trailing random bits, tests GENERAL cases */
input[gid + lid] += (rand() % 112);
+ if (IS_HALF)
+ input[gid + lid] = __float_to_half(as_uint((float)input[gid + lid]/2));
#if DEBUG_STDOUT
/* output generated input */
@@ -111,7 +134,7 @@ static void generate_data(WG_FUNCTION wg_func,
}
/* expected values */
- compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE);
+ compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, IS_HALF);
#if DEBUG_STDOUT
/* output expected input */
@@ -134,7 +157,8 @@ static void generate_data(WG_FUNCTION wg_func,
template<class T>
static void subgroup_generic(WG_FUNCTION wg_func,
T* input,
- T* expected)
+ T* expected,
+ bool IS_HALF = false)
{
/* get simd size */
globals[0] = WG_GLOBAL_SIZE;
@@ -143,7 +167,7 @@ static void subgroup_generic(WG_FUNCTION wg_func,
OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL);
/* input and expected data */
- generate_data(wg_func, input, expected, SIMD_SIZE);
+ generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF);
/* prepare input for data type */
OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL);
@@ -166,8 +190,21 @@ static void subgroup_generic(WG_FUNCTION wg_func,
for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
if(((T *)buf_data[1])[i] != *(expected + i))
{
+ if (IS_HALF) {
+ float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i]));
+ float num_expected = as_float(__half_to_float(*(expected + i)));
+ float num_diff = abs(num_computed - num_expected) / abs(num_expected);
+ if (num_diff > 0.03f) {
+ mismatches++;
+#if DEBUG_STDOUT
+ /* output mismatch */
+ cout << "Err at " << i << ", " << num_computed
+ << " != " << num_expected <<" diff: " <<num_diff <<endl;
+#endif
+ }
+ }
/* found mismatch on integer, increment */
- if(numeric_limits<T>::is_integer){
+ else if (numeric_limits<T>::is_integer) {
mismatches++;
#if DEBUG_STDOUT
@@ -261,6 +298,20 @@ void compiler_subgroup_scan_exclusive_add_float(void)
subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add_float);
+void compiler_subgroup_scan_exclusive_add_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl",
+ "compiler_subgroup_scan_exclusive_add_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_SCAN_EXCLUSIVE_ADD, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_add_half);
/*
* Workgroup scan_exclusive max utest functions
@@ -320,6 +371,20 @@ void compiler_subgroup_scan_exclusive_max_float(void)
subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max_float);
+void compiler_subgroup_scan_exclusive_max_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl",
+ "compiler_subgroup_scan_exclusive_max_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_SCAN_EXCLUSIVE_MAX, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_max_half);
/*
* Workgroup scan_exclusive min utest functions
@@ -379,3 +444,17 @@ void compiler_subgroup_scan_exclusive_min_float(void)
subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min_float);
+void compiler_subgroup_scan_exclusive_min_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_exclusive.cl",
+ "compiler_subgroup_scan_exclusive_min_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_SCAN_EXCLUSIVE_MIN, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_exclusive_min_half);
diff --git a/utests/compiler_subgroup_scan_inclusive.cpp b/utests/compiler_subgroup_scan_inclusive.cpp
index fa32855..0f0df1c 100644
--- a/utests/compiler_subgroup_scan_inclusive.cpp
+++ b/utests/compiler_subgroup_scan_inclusive.cpp
@@ -32,25 +32,41 @@ template<class T>
static void compute_expected(WG_FUNCTION wg_func,
T* input,
T* expected,
- size_t SIMD_SIZE)
+ size_t SIMD_SIZE,
+ bool IS_HALF)
{
if(wg_func == WG_SCAN_INCLUSIVE_ADD)
{
expected[0] = input[0];
- for(uint32_t i = 1; i < SIMD_SIZE; i++)
- expected[i] = input[i] + expected[i - 1];
+ for(uint32_t i = 1; i < SIMD_SIZE; i++) {
+ if (IS_HALF)
+ expected[i] = __float_to_half(as_uint(as_float(__half_to_float(input[i])) +
+ as_float(__half_to_float(expected[i - 1]))));
+ else
+ expected[i] = input[i] + expected[i - 1];
+ }
}
else if(wg_func == WG_SCAN_INCLUSIVE_MAX)
{
expected[0] = input[0];
- for(uint32_t i = 1; i < SIMD_SIZE; i++)
- expected[i] = max(input[i], expected[i - 1]);
+ for(uint32_t i = 1; i < SIMD_SIZE; i++) {
+ if (IS_HALF)
+ expected[i] = (as_float(__half_to_float(input[i])) > as_float(__half_to_float(expected[i - 1]))) ?
+ input[i] : expected[i - 1];
+ else
+ expected[i] = max(input[i], expected[i - 1]);
+ }
}
else if(wg_func == WG_SCAN_INCLUSIVE_MIN)
{
expected[0] = input[0];
- for(uint32_t i = 1; i < SIMD_SIZE; i++)
- expected[i] = min(input[i], expected[i - 1]);
+ for(uint32_t i = 1; i < SIMD_SIZE; i++) {
+ if (IS_HALF)
+ expected[i] = (as_float(__half_to_float(input[i])) < as_float(__half_to_float(expected[i - 1]))) ?
+ input[i] : expected[i - 1];
+ else
+ expected[i] = min(input[i], expected[i - 1]);
+ }
}
}
@@ -62,7 +78,8 @@ template<class T>
static void generate_data(WG_FUNCTION wg_func,
T* &input,
T* &expected,
- size_t SIMD_SIZE)
+ size_t SIMD_SIZE,
+ bool IS_HALF)
{
input = new T[WG_GLOBAL_SIZE];
expected = new T[WG_GLOBAL_SIZE];
@@ -91,6 +108,8 @@ static void generate_data(WG_FUNCTION wg_func,
input[gid + lid] += ((rand() % 2 - 1) * base_val);
/* add trailing random bits, tests GENERAL cases */
input[gid + lid] += (rand() % 112);
+ if (IS_HALF)
+ input[gid + lid] = __float_to_half(as_uint((float)input[gid + lid]/2));
#if DEBUG_STDOUT
/* output generated input */
@@ -101,7 +120,7 @@ static void generate_data(WG_FUNCTION wg_func,
}
/* expected values */
- compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE);
+ compute_expected(wg_func, input + gid, expected + gid, SIMD_SIZE, IS_HALF);
#if DEBUG_STDOUT
/* output expected input */
@@ -124,7 +143,8 @@ static void generate_data(WG_FUNCTION wg_func,
template<class T>
static void subgroup_generic(WG_FUNCTION wg_func,
T* input,
- T* expected)
+ T* expected,
+ bool IS_HALF = false)
{
/* get simd size */
globals[0] = WG_GLOBAL_SIZE;
@@ -133,7 +153,7 @@ static void subgroup_generic(WG_FUNCTION wg_func,
OCL_CALL(utestclGetKernelSubGroupInfoKHR,kernel,device,CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR,sizeof(size_t)*1,locals,sizeof(size_t),&SIMD_SIZE,NULL);
/* input and expected data */
- generate_data(wg_func, input, expected, SIMD_SIZE);
+ generate_data(wg_func, input, expected, SIMD_SIZE, IS_HALF);
/* prepare input for data type */
OCL_CREATE_BUFFER(buf[0], 0, WG_GLOBAL_SIZE * sizeof(T), NULL);
@@ -156,8 +176,21 @@ static void subgroup_generic(WG_FUNCTION wg_func,
for (uint32_t i = 0; i < WG_GLOBAL_SIZE; i++)
if(((T *)buf_data[1])[i] != *(expected + i))
{
+ if (IS_HALF) {
+ float num_computed = as_float(__half_to_float(((T *)buf_data[1])[i]));
+ float num_expected = as_float(__half_to_float(*(expected + i)));
+ float num_diff = abs(num_computed - num_expected) / abs(num_expected);
+ if (num_diff > 0.03f) {
+ mismatches++;
+#if DEBUG_STDOUT
+ /* output mismatch */
+ cout << "Err at " << i << ", " << num_computed
+ << " != " << num_expected <<" diff: " <<num_diff <<endl;
+#endif
+ }
+ }
/* found mismatch on integer, increment */
- if(numeric_limits<T>::is_integer){
+ else if (numeric_limits<T>::is_integer) {
mismatches++;
#if DEBUG_STDOUT
@@ -251,6 +284,20 @@ void compiler_subgroup_scan_inclusive_add_float(void)
subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_float);
+void compiler_subgroup_scan_inclusive_add_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl",
+ "compiler_subgroup_scan_inclusive_add_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_SCAN_INCLUSIVE_ADD, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_add_half);
/*
* Workgroup scan_inclusive max utest functions
@@ -310,6 +357,20 @@ void compiler_subgroup_scan_inclusive_max_float(void)
subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max_float);
+void compiler_subgroup_scan_inclusive_max_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl",
+ "compiler_subgroup_scan_inclusive_max_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_SCAN_INCLUSIVE_MAX, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_max_half);
/*
* Workgroup scan_inclusive min utest functions
@@ -369,4 +430,17 @@ void compiler_subgroup_scan_inclusive_min_float(void)
subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected);
}
MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_float);
-
+void compiler_subgroup_scan_inclusive_min_half(void)
+{
+ if(!cl_check_subgroups())
+ return;
+ if(!cl_check_half())
+ return;
+ cl_half *input = NULL;
+ cl_half *expected = NULL;
+ OCL_CALL(cl_kernel_init, "compiler_subgroup_scan_inclusive.cl",
+ "compiler_subgroup_scan_inclusive_min_half",
+ SOURCE, "-DHALF");
+ subgroup_generic(WG_SCAN_INCLUSIVE_MIN, input, expected, true);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_subgroup_scan_inclusive_min_half);
--
2.7.4
More information about the Beignet
mailing list