[Beignet] [PATCH 7/8] Add utest case for half float basic.
junyan.he at inbox.com
junyan.he at inbox.com
Thu May 21 01:26:05 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
kernels/compiler_half.cl | 7 +++
utests/CMakeLists.txt | 1 +
utests/compiler_half.cpp | 158 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 166 insertions(+)
create mode 100644 kernels/compiler_half.cl
create mode 100644 utests/compiler_half.cpp
diff --git a/kernels/compiler_half.cl b/kernels/compiler_half.cl
new file mode 100644
index 0000000..03b2891
--- /dev/null
+++ b/kernels/compiler_half.cl
@@ -0,0 +1,7 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_half(global half *src, global half *dst) {
+ int i = get_global_id(0);
+ half hf = 2.5;
+ dst[i] = src[i] + hf;
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index d5bf14a..791b375 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -183,6 +183,7 @@ set (utests_sources
compiler_long_mult.cpp
compiler_long_cmp.cpp
compiler_long_bitcast.cpp
+ compiler_half.cpp
compiler_function_argument3.cpp
compiler_function_qualifiers.cpp
compiler_bool_cross_basic_block.cpp
diff --git a/utests/compiler_half.cpp b/utests/compiler_half.cpp
new file mode 100644
index 0000000..079bcd5
--- /dev/null
+++ b/utests/compiler_half.cpp
@@ -0,0 +1,158 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+static uint32_t __half_to_float(uint16_t h)
+{
+ struct __FP32 {
+ uint32_t mantissa:23;
+ uint32_t exponent:8;
+ uint32_t sign:1;
+ };
+ struct __FP16 {
+ uint32_t mantissa:10;
+ uint32_t exponent:5;
+ uint32_t sign:1;
+ };
+ uint32_t f;
+ __FP32 o;
+ memset(&o, 0, sizeof(o));
+ __FP16 i;
+ memcpy(&i, &h, sizeof(uint16_t));
+
+ if (i.exponent == 0 && i.mantissa == 0) // (Signed) zero
+ o.sign = i.sign;
+ else {
+ if (i.exponent == 0) { // Denormal (converts to normalized)
+ // Adjust mantissa so it's normalized (and keep
+ // track of exponent adjustment)
+ int e = -1;
+ uint m = i.mantissa;
+ do {
+ e++;
+ m <<= 1;
+ } while ((m & 0x400) == 0);
+
+ o.mantissa = (m & 0x3ff) << 13;
+ o.exponent = 127 - 15 - e;
+ o.sign = i.sign;
+ } else if (i.exponent == 0x1f) { // Inf/NaN
+ // NOTE: Both can be handled with same code path
+ // since we just pass through mantissa bits.
+ o.mantissa = i.mantissa << 13;
+ o.exponent = 255;
+ o.sign = i.sign;
+ } else { // Normalized number
+ o.mantissa = i.mantissa << 13;
+ o.exponent = 127 - 15 + i.exponent;
+ o.sign = i.sign;
+ }
+ }
+
+ memcpy(&f, &o, sizeof(uint32_t));
+ return f;
+}
+
+
+static uint16_t __float_to_half(uint32_t x)
+{
+ uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
+ uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
+ unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
+
+ /* If zero, or denormal, or exponent underflows too much for a denormal
+ * half, return signed zero. */
+ if (e < 103)
+ return bits;
+
+ /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
+ if (e > 142) {
+ bits |= 0x7c00u;
+ /* If exponent was 0xff and one mantissa bit was set, it means NaN,
+ * not Inf, so make sure we set one mantissa bit too. */
+ bits |= e == 255 && (x & 0x007fffffu);
+ return bits;
+ }
+
+ /* If exponent underflows but not too much, return a denormal */
+ if (e < 113) {
+ m |= 0x0800u;
+ /* Extra rounding may overflow and set mantissa to 0 and exponent
+ * to 1, which is OK. */
+ bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+ return bits;
+ }
+
+ bits |= ((e - 112) << 10) | (m >> 1);
+ /* Extra rounding. An overflow will set mantissa to 0 and increment
+ * the exponent, which is OK. */
+ bits += m & 1;
+ return bits;
+}
+
+void compiler_half(void)
+{
+ const size_t n = 16;
+ uint16_t hsrc[n];
+ float fsrc[n], fdst[n];
+ float f = 2.5;
+ uint32_t tmp_f;
+
+ std::string extStr;
+ size_t param_value_size;
+ OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, ¶m_value_size);
+ std::vector<char> param_value(param_value_size);
+ OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size,
+ param_value.empty() ? NULL : ¶m_value.front(), ¶m_value_size);
+ if (!param_value.empty())
+ extStr = std::string(¶m_value.front(), param_value_size-1);
+
+ if (std::strstr(extStr.c_str(), "cl_khr_fp16") == false) {
+ printf("No cl_khr_fp16, Skip!");
+ return;
+ }
+
+ memcpy(&tmp_f, &f, sizeof(float));
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_half");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ fsrc[i] = 0.1 * i;
+ memcpy(&tmp_f, &fsrc[i], sizeof(float));
+ hsrc[i] = __float_to_half(tmp_f);
+ }
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ fdst[i] = fsrc[i] + f;
+ }
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ memcpy(buf_data[0], hsrc, sizeof(hsrc));
+ memset(buf_data[1], 0, sizeof(hsrc));
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]);
+ memcpy(&f, &tmp_f, sizeof(float));
+ //printf("%f %f\n", f, fdst[i]);
+ OCL_ASSERT((f - fdst[i]) <= 0.001 && (f - fdst[i]) >= -0.001);
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_half);
--
1.9.1
More information about the Beignet
mailing list