[Beignet] [PATCH 2/2] Utest: Add test cases for half geometric functions.

junyan.he at inbox.com junyan.he at inbox.com
Mon Jul 6 03:26:20 PDT 2015


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 kernels/compiler_half_geometry.cl |  12 ++++
 utests/compiler_half.cpp          | 124 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 kernels/compiler_half_geometry.cl

diff --git a/kernels/compiler_half_geometry.cl b/kernels/compiler_half_geometry.cl
new file mode 100644
index 0000000..687ad11
--- /dev/null
+++ b/kernels/compiler_half_geometry.cl
@@ -0,0 +1,12 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_half_dot(global half4 *src0, global half4 *src1, global half *dst) {
+  int i = get_global_id(0);
+  half val = dot(src0[i], src1[i]);
+  dst[i] = val;
+}
+
+kernel void compiler_half_length(global half4 *src, global half *dst) {
+  int i = get_global_id(0);
+  half val = length(src[i]);
+  dst[i] = val;
+}
diff --git a/utests/compiler_half.cpp b/utests/compiler_half.cpp
index ce0f7da..63f1483 100644
--- a/utests/compiler_half.cpp
+++ b/utests/compiler_half.cpp
@@ -172,7 +172,7 @@ void compiler_half_basic(void)
   for (int32_t i = 0; i < (int32_t) n; ++i) {
     tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]);
     memcpy(&f, &tmp_f, sizeof(float));
-    printf("%f %f\n", f, fdst[i]);
+    //printf("%f %f\n", f, fdst[i]);
     OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0));
   }
   OCL_UNMAP_BUFFER(1);
@@ -922,3 +922,125 @@ void compiler_half_to_long_sat(void)
   OCL_UNMAP_BUFFER(1);
 }
 MAKE_UTEST_FROM_FUNCTION(compiler_half_to_long_sat);
+
+void compiler_half_dot(void)
+{
+  const size_t n = 16*4;
+  const size_t m = 16;
+  uint16_t hsrc0[n], hsrc1[n];
+  float fsrc0[n], fsrc1[n], fdst[m];
+  uint32_t tmp_f;
+  float f;
+
+  if (!check_half_device())
+    return;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_half_geometry", "compiler_half_dot");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, m * sizeof(uint16_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    fsrc0[i] = 3.2 * i;
+    memcpy(&tmp_f, &fsrc0[i], sizeof(float));
+    hsrc0[i] = __float_to_half(tmp_f);
+    fsrc1[i] = 0.52 * i;
+    memcpy(&tmp_f, &fsrc1[i], sizeof(float));
+    hsrc1[i] = __float_to_half(tmp_f);
+  }
+
+  for (int32_t i = 0; i < (int32_t) m; ++i) {
+    fdst[i] = fsrc0[i*4] * fsrc1[i*4] +
+        fsrc0[i*4 + 1] * fsrc1[i*4 + 1] +
+        fsrc0[i*4 + 2] * fsrc1[i*4 + 2] +
+        fsrc0[i*4 + 3] * fsrc1[i*4 + 3];
+  }
+
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  memcpy(buf_data[0], hsrc0, sizeof(hsrc0));
+  memcpy(buf_data[1], hsrc1, sizeof(hsrc1));
+  memset(buf_data[2], 0, m * sizeof(uint16_t));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) m; ++i) {
+    tmp_f = __half_to_float(((uint16_t *)buf_data[2])[i]);
+    memcpy(&f, &tmp_f, sizeof(float));
+    //printf("%f %f\n", f, fdst[i]);
+    OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0));
+  }
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_half_dot);
+
+void compiler_half_length(void)
+{
+  const size_t n = 16*4;
+  const size_t m = 16;
+  uint16_t hsrc[n];
+  float fsrc[n], fdst[m];
+  uint32_t tmp_f;
+  float f;
+
+  if (!check_half_device())
+    return;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_half_geometry", "compiler_half_length");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, m * sizeof(uint16_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    fsrc[i] = 3.2 * i;
+    memcpy(&tmp_f, &fsrc[i], sizeof(float));
+    hsrc[i] = __float_to_half(tmp_f);
+  }
+
+  for (int32_t i = 0; i < (int32_t) m; ++i) {
+    fdst[i] = sqrtf(fsrc[i*4] * fsrc[i*4] +
+        fsrc[i*4 + 1] * fsrc[i*4 + 1] +
+        fsrc[i*4 + 2] * fsrc[i*4 + 2] +
+        fsrc[i*4 + 3] * fsrc[i*4 + 3]);
+  }
+
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  memcpy(buf_data[0], hsrc, sizeof(hsrc));
+  memset(buf_data[1], 0, m * sizeof(uint16_t));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) m; ++i) {
+    tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]);
+    memcpy(&f, &tmp_f, sizeof(float));
+    //printf("%f %f\n", f, fdst[i]);
+    OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0));
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_half_length);
-- 
1.9.1



More information about the Beignet mailing list