[Beignet] [PATCH 7/8] Add utest case for half float basic.

Thu May 21 01:26:05 PDT 2015

From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 kernels/compiler_half.cl |   7 +++
 utests/CMakeLists.txt    |   1 +
 utests/compiler_half.cpp | 158 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+)
 create mode 100644 kernels/compiler_half.cl
 create mode 100644 utests/compiler_half.cpp

diff --git a/kernels/compiler_half.cl b/kernels/compiler_half.cl
new file mode 100644
index 0000000..03b2891
--- /dev/null
+++ b/kernels/compiler_half.cl
@@ -0,0 +1,7 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_half(global half *src, global half *dst) {
+  int i = get_global_id(0);
+  half hf = 2.5;
+  dst[i] = src[i] + hf;
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index d5bf14a..791b375 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -183,6 +183,7 @@ set (utests_sources
   compiler_long_mult.cpp
   compiler_long_cmp.cpp
   compiler_long_bitcast.cpp
+  compiler_half.cpp
   compiler_function_argument3.cpp
   compiler_function_qualifiers.cpp
   compiler_bool_cross_basic_block.cpp
diff --git a/utests/compiler_half.cpp b/utests/compiler_half.cpp
new file mode 100644
index 0000000..079bcd5
--- /dev/null
+++ b/utests/compiler_half.cpp
@@ -0,0 +1,158 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+static uint32_t __half_to_float(uint16_t h)
+{
+  struct __FP32 {
+    uint32_t mantissa:23;
+    uint32_t exponent:8;
+    uint32_t sign:1;
+  };
+  struct __FP16 {
+    uint32_t mantissa:10;
+    uint32_t exponent:5;
+    uint32_t sign:1;
+  };
+  uint32_t f;
+  __FP32 o;
+  memset(&o, 0, sizeof(o));
+  __FP16 i;
+  memcpy(&i, &h, sizeof(uint16_t));
+
+  if (i.exponent == 0 && i.mantissa == 0) // (Signed) zero
+    o.sign = i.sign;
+  else {
+    if (i.exponent == 0) { // Denormal (converts to normalized)
+      // Adjust mantissa so it's normalized (and keep
+      // track of exponent adjustment)
+      int e = -1;
+      uint m = i.mantissa;
+      do {
+        e++;
+        m <<= 1;
+      } while ((m & 0x400) == 0);
+
+      o.mantissa = (m & 0x3ff) << 13;
+      o.exponent = 127 - 15 - e;
+      o.sign = i.sign;
+    } else if (i.exponent == 0x1f) { // Inf/NaN
+      // NOTE: Both can be handled with same code path
+      // since we just pass through mantissa bits.
+      o.mantissa = i.mantissa << 13;
+      o.exponent = 255;
+      o.sign = i.sign;
+    } else { // Normalized number
+      o.mantissa = i.mantissa << 13;
+      o.exponent = 127 - 15 + i.exponent;
+      o.sign = i.sign;
+    }
+  }
+
+  memcpy(&f, &o, sizeof(uint32_t));
+  return f;
+}
+
+
+static uint16_t __float_to_half(uint32_t x)
+{
+  uint16_t bits = (x >> 16) & 0x8000; /* Get the sign */
+  uint16_t m = (x >> 12) & 0x07ff; /* Keep one extra bit for rounding */
+  unsigned int e = (x >> 23) & 0xff; /* Using int is faster here */
+
+  /* If zero, or denormal, or exponent underflows too much for a denormal
+   * half, return signed zero. */
+  if (e < 103)
+    return bits;
+
+  /* If NaN, return NaN. If Inf or exponent overflow, return Inf. */
+  if (e > 142) {
+    bits |= 0x7c00u;
+    /* If exponent was 0xff and one mantissa bit was set, it means NaN,
+     * not Inf, so make sure we set one mantissa bit too. */
+    bits |= e == 255 && (x & 0x007fffffu);
+    return bits;
+  }
+
+  /* If exponent underflows but not too much, return a denormal */
+  if (e < 113) {
+    m |= 0x0800u;
+    /* Extra rounding may overflow and set mantissa to 0 and exponent
+     * to 1, which is OK. */
+    bits |= (m >> (114 - e)) + ((m >> (113 - e)) & 1);
+    return bits;
+  }
+
+  bits |= ((e - 112) << 10) | (m >> 1);
+  /* Extra rounding. An overflow will set mantissa to 0 and increment
+   * the exponent, which is OK. */
+  bits += m & 1;
+  return bits;
+}
+
+void compiler_half(void)
+{
+  const size_t n = 16;
+  uint16_t hsrc[n];
+  float fsrc[n], fdst[n];
+  float f = 2.5;
+  uint32_t tmp_f;
+
+  std::string extStr;
+  size_t param_value_size;
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, 0, 0, &param_value_size);
+  std::vector<char> param_value(param_value_size);
+  OCL_CALL(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, param_value_size,
+      param_value.empty() ? NULL : &param_value.front(), &param_value_size);
+  if (!param_value.empty())
+    extStr = std::string(&param_value.front(), param_value_size-1); 
+
+  if (std::strstr(extStr.c_str(), "cl_khr_fp16") == false) {
+    printf("No cl_khr_fp16, Skip!");
+    return;
+  }
+
+  memcpy(&tmp_f, &f, sizeof(float));
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL("compiler_half");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    fsrc[i] = 0.1 * i;
+    memcpy(&tmp_f, &fsrc[i], sizeof(float));
+    hsrc[i] = __float_to_half(tmp_f);
+  }
+
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    fdst[i] = fsrc[i] + f;
+  }
+
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  memcpy(buf_data[0], hsrc, sizeof(hsrc));
+  memset(buf_data[1], 0, sizeof(hsrc));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]);
+    memcpy(&f, &tmp_f, sizeof(float));
+    //printf("%f %f\n", f, fdst[i]);
+    OCL_ASSERT((f - fdst[i]) <= 0.001 && (f - fdst[i]) >= -0.001);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_half);
-- 
1.9.1