[Beignet] [PATCH 4/5] Utests: Add test cases for double conversion.

junyan.he at inbox.com junyan.he at inbox.com
Tue Oct 27 03:36:17 PDT 2015


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 kernels/compiler_double_2.cl       |   9 -
 kernels/compiler_double_convert.cl | 102 ++++++
 kernels/compiler_half_convert.cl   |  11 +-
 utests/CMakeLists.txt              |   1 +
 utests/compiler_double_2.cpp       |  47 ---
 utests/compiler_double_convert.cpp | 622 +++++++++++++++++++++++++++++++++++++
 utests/compiler_half.cpp           | 102 ++++++
 7 files changed, 837 insertions(+), 57 deletions(-)
 delete mode 100644 kernels/compiler_double_2.cl
 create mode 100644 kernels/compiler_double_convert.cl
 delete mode 100644 utests/compiler_double_2.cpp
 create mode 100644 utests/compiler_double_convert.cpp

diff --git a/kernels/compiler_double_2.cl b/kernels/compiler_double_2.cl
deleted file mode 100644
index 20ee614..0000000
--- a/kernels/compiler_double_2.cl
+++ /dev/null
@@ -1,9 +0,0 @@
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-kernel void compiler_double_2(global float *src, global double *dst) {
-  int i = get_global_id(0);
-  float d = 1.234567890123456789f;
-  if (i < 14)
-    dst[i] = d * (d + src[i]);
-  else
-    dst[i] = 14;
-}
diff --git a/kernels/compiler_double_convert.cl b/kernels/compiler_double_convert.cl
new file mode 100644
index 0000000..98c5c19
--- /dev/null
+++ b/kernels/compiler_double_convert.cl
@@ -0,0 +1,102 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+kernel void compiler_double_convert_int(global double *src, global int *dst0, global uint* dst1) {
+  int i = get_global_id(0);
+  
+  if (i%3) {
+    int i32 = src[i];
+    dst0[i] = i32;
+
+    uint u32 = src[i];
+    dst1[i] = u32;
+  }
+}
+
+kernel void compiler_double_convert_float(global double *src, global float *dst) {
+  int i = get_global_id(0);
+  
+  float f = src[i];
+  dst[i] = f;
+}
+
+kernel void compiler_double_convert_short(global double *src, global short *dst0, global ushort * dst1) {
+  int i = get_global_id(0);
+
+  if (i%3) {
+    short i16 = src[i];
+    dst0[i] = i16;
+
+    ushort u16 = src[i];
+    dst1[i] = u16;
+  }
+}
+
+kernel void compiler_double_convert_long(global double *src, global long *dst0, global ulong * dst1) {
+  int i = get_global_id(0);
+
+  if (i%3) {
+    long i64 = src[i];
+    dst0[i] = i64;
+
+    ulong u64 = src[i];
+    dst1[i] = u64;
+  }
+}
+
+kernel void compiler_double_convert_char(global double *src, global char *dst0, global uchar * dst1) {
+  int i = get_global_id(0);
+
+  if (i%3) {
+    char i8 = src[i];
+    dst0[i] = i8;
+
+    uchar u8 = src[i];
+    dst1[i] = u8;
+  }
+}
+
+kernel void compiler_long_convert_double(global long *src0, global ulong *src1, global double * dst0, global double *dst1) {
+  int i = get_global_id(0);
+
+  double d = src0[i];
+  dst0[i] = d;
+
+  d = src1[i];
+  dst1[i] = d;
+}
+
+kernel void compiler_int_convert_double(global int *src0, global uint *src1, global double * dst0, global double *dst1) {
+  int i = get_global_id(0);
+
+  double d = src0[i];
+  dst0[i] = d;
+
+  d = src1[i];
+  dst1[i] = d;
+}
+
+kernel void compiler_short_convert_double(global short *src0, global ushort *src1, global double * dst0, global double *dst1) {
+  int i = get_global_id(0);
+
+  double d = src0[i];
+  dst0[i] = d;
+
+  d = src1[i];
+  dst1[i] = d;
+}
+
+kernel void compiler_char_convert_double(global char *src0, global uchar *src1, global double * dst0, global double *dst1) {
+  int i = get_global_id(0);
+
+  double d = src0[i];
+  dst0[i] = d;
+
+  d = src1[i];
+  dst1[i] = d;
+}
+
+kernel void compiler_float_convert_double(global float *src, global double *dst) {
+  int i = get_global_id(0);
+
+  double d = src[i];
+  dst[i] = d;
+}
diff --git a/kernels/compiler_half_convert.cl b/kernels/compiler_half_convert.cl
index c28921e..3587e19 100644
--- a/kernels/compiler_half_convert.cl
+++ b/kernels/compiler_half_convert.cl
@@ -1,5 +1,4 @@
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
 kernel void compiler_half_to_long_sat(global half *src, global long *dst) {
   int i = get_global_id(0);
   dst[i] = convert_long_sat(src[i]);
@@ -54,3 +53,13 @@ kernel void compiler_half_to_float(global half4 *src, global float4 *dst) {
   int i = get_global_id(0);
   dst[i] = convert_float4(src[i]);
 }
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+kernel void compiler_half_to_double(global half *src, global double *dst) {
+  int i = get_global_id(0);
+  dst[i] = src[i];
+}
+kernel void compiler_double_to_half(global double *src, global half *dst) {
+  int i = get_global_id(0);
+  dst[i] = src[i];
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 18337fa..f44fe19 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -196,6 +196,7 @@ set (utests_sources
   compiler_double_precision.cpp
   compiler_double.cpp
   compiler_double_div.cpp
+  compiler_double_convert.cpp
   load_program_from_gen_bin.cpp
   load_program_from_spir.cpp
   get_arg_info.cpp
diff --git a/utests/compiler_double_2.cpp b/utests/compiler_double_2.cpp
deleted file mode 100644
index 7e3ae4b..0000000
--- a/utests/compiler_double_2.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-#include <cmath>
-#include "utest_helper.hpp"
-
-static void cpu(int global_id, float *src, double *dst) {
-  float f = src[global_id];
-  float d = 1.234567890123456789;
-  dst[global_id] = global_id < 14 ? d * (d + f) : 14;
-}
-
-void compiler_double_2(void)
-{
-  const size_t n = 16;
-  float cpu_src[n];
-  double cpu_dst[n];
-
-  // Setup kernel and buffers
-  OCL_CREATE_KERNEL("compiler_double_2");
-  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
-  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
-  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
-  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
-  globals[0] = n;
-  locals[0] = 16;
-
-  // Run random tests
-  for (uint32_t pass = 0; pass < 1; ++pass) {
-    OCL_MAP_BUFFER(0);
-    for (int32_t i = 0; i < (int32_t) n; ++i)
-      cpu_src[i] = ((float*)buf_data[0])[i] = .1f * (rand() & 15) - .75f;
-    OCL_UNMAP_BUFFER(0);
-
-    // Run the kernel on GPU
-    OCL_NDRANGE(1);
-
-    // Run on CPU
-    for (int32_t i = 0; i < (int32_t) n; ++i)
-      cpu(i, cpu_src, cpu_dst);
-
-    // Compare
-    OCL_MAP_BUFFER(1);
-    for (int32_t i = 0; i < (int32_t) n; ++i)
-      OCL_ASSERT(fabs(((double*)buf_data[1])[i] - cpu_dst[i]) < 1e-4);
-    OCL_UNMAP_BUFFER(1);
-  }
-}
-
-MAKE_UTEST_FROM_FUNCTION(compiler_double_2);
diff --git a/utests/compiler_double_convert.cpp b/utests/compiler_double_convert.cpp
new file mode 100644
index 0000000..f7e962b
--- /dev/null
+++ b/utests/compiler_double_convert.cpp
@@ -0,0 +1,622 @@
+#include <cmath>
+#include <string.h>
+#include "utest_helper.hpp"
+
+void compiler_double_convert_int(void)
+{
+  const size_t n = 16;
+  double src[n];
+  int32_t cpu_dst0[n];
+  uint32_t cpu_dst1[n];
+  
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_int");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int32_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint32_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = ((double*)buf_data[0])[i] = 32.1d * (rand() & 1324135) + 1434342.73209855531d;
+    ((int32_t*)buf_data[1])[i] = 0;
+    ((uint32_t*)buf_data[2])[i] = 0;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    if (i%3 == 0) continue;
+    cpu_dst0[i] = (int32_t)src[i];
+    cpu_dst1[i] = (uint32_t)src[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n",
+    //   ((int*)buf_data[1])[i], cpu_dst0[i], ((uint32_t*)buf_data[2])[i], cpu_dst1[i], src[i]);
+    OCL_ASSERT(((int32_t*)buf_data[1])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((uint32_t*)buf_data[2])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_int);
+
+void compiler_double_convert_float(void)
+{
+  const size_t n = 16;
+  double src[n];
+  float cpu_dst[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst, 0, sizeof(cpu_dst));
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_float");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = ((double*)buf_data[0])[i] = 1332.1d * (rand() & 1324135) - 1434342.73209855531d * (rand() & 135);
+    ((float*)buf_data[1])[i] = 0;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    cpu_dst[i] = (float)src[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("Return float is %f,\t ref is %f,\t double is %f\n", ((float*)buf_data[1])[i], cpu_dst[i], src[i]);
+    OCL_ASSERT(((float*)buf_data[1])[i] == cpu_dst[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_float);
+
+void compiler_double_convert_short(void)
+{
+  const size_t n = 16;
+  double src[n];
+  int16_t cpu_dst0[n];
+  uint16_t cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_short");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int16_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint16_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = ((double*)buf_data[0])[i] = 10.3443d * (rand() & 15) + 14.8924323d;
+    ((int16_t*)buf_data[1])[i] = 0;
+    ((uint16_t*)buf_data[2])[i] = 0;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    if (i%3 == 0) continue;
+    cpu_dst0[i] = (int16_t)src[i];
+    cpu_dst1[i] = (uint16_t)src[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    //printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n",
+    //   ((int16_t*)buf_data[1])[i], cpu_dst0[i], ((uint16_t*)buf_data[2])[i], cpu_dst1[i], src[i]);
+    OCL_ASSERT(((int16_t*)buf_data[1])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((uint16_t*)buf_data[2])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_short);
+
+void compiler_double_convert_char(void)
+{
+  const size_t n = 16;
+  double src[n];
+  int8_t cpu_dst0[n];
+  uint8_t cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_char");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int8_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint8_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = ((double*)buf_data[0])[i] = 10.3443d * (rand() & 7) + 2.8924323d;
+    ((int8_t*)buf_data[1])[i] = 0;
+    ((uint8_t*)buf_data[2])[i] = 0;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    if (i%3 == 0) continue;
+    cpu_dst0[i] = (int8_t)src[i];
+    cpu_dst1[i] = (uint8_t)src[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+//    printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n",
+//       ((int8_t*)buf_data[1])[i], cpu_dst0[i], ((uint8_t*)buf_data[2])[i], cpu_dst1[i], src[i]);
+    OCL_ASSERT(((int8_t*)buf_data[1])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((uint8_t*)buf_data[2])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_char);
+
+void compiler_double_convert_long(void)
+{
+  const size_t n = 16;
+  double src[n];
+  int64_t cpu_dst0[n];
+  uint64_t cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_double_convert_long");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(uint64_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = ((double*)buf_data[0])[i] = 10.3443d * (rand() & 7) + 2.8924323d;
+    ((int64_t*)buf_data[1])[i] = 0;
+    ((uint64_t*)buf_data[2])[i] = 0;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    if (i%3 == 0) continue;
+    cpu_dst0[i] = (int64_t)src[i];
+    cpu_dst1[i] = (uint64_t)src[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+//    printf("Return Int is %d, ref is %d,\t Uint is %u, ref is %u,\t double is %f\n",
+//       ((int8_t*)buf_data[1])[i], cpu_dst0[i], ((uint8_t*)buf_data[2])[i], cpu_dst1[i], src[i]);
+    OCL_ASSERT(((int64_t*)buf_data[1])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((uint64_t*)buf_data[2])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_double_convert_long);
+
+void compiler_long_convert_double(void)
+{
+  const size_t n = 16;
+  int64_t src0[n];
+  uint64_t src1[n];
+  double cpu_dst0[n];
+  double cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_long_convert_double");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int64_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src0[i] = ((int64_t*)buf_data[0])[i] = 0xABC8ABACDA00C * (rand() & 7);
+    src1[i] = ((uint64_t*)buf_data[1])[i] = 0xCABC8ABACDA00C * (rand() & 15);
+    ((double*)buf_data[2])[i] = 0.0d;
+    ((double*)buf_data[3])[i] = 0.0d;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    cpu_dst0[i] = (double)src0[i];
+    cpu_dst1[i] = (double)src1[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+//    printf("long is %ld, ref is %f, double is %f    \t"
+//           "ulong is %lu, ref is %f, double is %f\n",
+//           src0[i], cpu_dst0[i], ((double*)buf_data[2])[i],
+//           src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]);
+    OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_convert_double);
+
+void compiler_int_convert_double(void)
+{
+  const size_t n = 16;
+  int32_t src0[n];
+  uint32_t src1[n];
+  double cpu_dst0[n];
+  double cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_int_convert_double");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int32_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint32_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src0[i] = ((int32_t*)buf_data[0])[i] = 0xCABC8A0C * (rand() & 7);
+    src1[i] = ((uint32_t*)buf_data[1])[i] = 0xCACDA00C * (rand() & 15);
+    ((double*)buf_data[2])[i] = 0.0d;
+    ((double*)buf_data[3])[i] = 0.0d;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    cpu_dst0[i] = (double)src0[i];
+    cpu_dst1[i] = (double)src1[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+//    printf("int is %d, ref is %f, double is %f    \t"
+//           "uint is %u, ref is %f, double is %f\n",
+//           src0[i], cpu_dst0[i], ((double*)buf_data[2])[i],
+//           src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]);
+    OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_int_convert_double);
+
+void compiler_short_convert_double(void)
+{
+  const size_t n = 16;
+  int16_t src0[n];
+  uint16_t src1[n];
+  double cpu_dst0[n];
+  double cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_short_convert_double");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int16_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src0[i] = ((int16_t*)buf_data[0])[i] = 0x8A0C * (rand() & 7);
+    src1[i] = ((uint16_t*)buf_data[1])[i] = 0xC00C * (rand() & 15);
+    ((double*)buf_data[2])[i] = 0.0d;
+    ((double*)buf_data[3])[i] = 0.0d;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    cpu_dst0[i] = (double)src0[i];
+    cpu_dst1[i] = (double)src1[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+//    printf("short is %d, ref is %f, double is %f    \t"
+//           "ushort is %u, ref is %f, double is %f\n",
+//           src0[i], cpu_dst0[i], ((double*)buf_data[2])[i],
+//           src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]);
+    OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_short_convert_double);
+
+void compiler_char_convert_double(void)
+{
+  const size_t n = 16;
+  int8_t src0[n];
+  uint8_t src1[n];
+  double cpu_dst0[n];
+  double cpu_dst1[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst0, 0, sizeof(cpu_dst0));
+  memset(cpu_dst1, 0, sizeof(cpu_dst1));
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_char_convert_double");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(int8_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint8_t), NULL);
+  OCL_CREATE_BUFFER(buf[2], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[3], 0, n * sizeof(double), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(cl_mem), &buf[3]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src0[i] = ((int8_t*)buf_data[0])[i] = 0x8C * (rand() & 7);
+    src1[i] = ((uint8_t*)buf_data[1])[i] = 0xC0 * (rand() & 15);
+    ((double*)buf_data[2])[i] = 0.0d;
+    ((double*)buf_data[3])[i] = 0.0d;
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    cpu_dst0[i] = (double)src0[i];
+    cpu_dst1[i] = (double)src1[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(2);
+  OCL_MAP_BUFFER(3);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+//    printf("char is %d, ref is %f, double is %f    \t"
+//           "uchar is %u, ref is %f, double is %f\n",
+//           src0[i], cpu_dst0[i], ((double*)buf_data[2])[i],
+//           src1[i], cpu_dst1[i], ((double*)buf_data[3])[i]);
+    OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]);
+    OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(2);
+  OCL_UNMAP_BUFFER(3);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_char_convert_double);
+
+void compiler_float_convert_double(void)
+{
+  const size_t n = 16;
+  float src[n];
+  double cpu_dst[n];
+
+  if (!cl_check_double())
+    return;
+
+  memset(cpu_dst, 0, sizeof(cpu_dst));
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_double_convert", "compiler_float_convert_double");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  // Run random tests
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    src[i] = ((float*)buf_data[0])[i] = (float)(0x8C * (rand() & 7)) * 1342.42f;
+    ((double*)buf_data[1])[i] = 0.0d;  
+  }
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Run on CPU
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    cpu_dst[i] = (double)src[i];
+  }
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    printf("%f,   \t%f\n", ((double*)buf_data[1])[i], cpu_dst[i]);
+//    OCL_ASSERT(((double*)buf_data[2])[i] == cpu_dst0[i]);
+//    OCL_ASSERT(((double*)buf_data[3])[i] == cpu_dst1[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_float_convert_double);
diff --git a/utests/compiler_half.cpp b/utests/compiler_half.cpp
index e8ed286..163573f 100644
--- a/utests/compiler_half.cpp
+++ b/utests/compiler_half.cpp
@@ -922,3 +922,105 @@ void compiler_half_to_long_sat(void)
   OCL_UNMAP_BUFFER(1);
 }
 MAKE_UTEST_FROM_FUNCTION(compiler_half_to_long_sat);
+
+void compiler_half_to_double(void)
+{
+  const size_t n = 16;
+  uint16_t hsrc[n];
+  double ddst[n];
+  uint32_t tmp_f;
+  float f;
+
+//  if (!check_half_device())
+//    return;
+  if (!cl_check_double())
+    return;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_half_to_double");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(double), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    f = -100.1f + 10.3f * i;
+    memcpy(&tmp_f, &f, sizeof(float));
+    hsrc[i] = __float_to_half(tmp_f);
+    ddst[i] = (double)f;
+  }
+
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  memcpy(buf_data[0], hsrc, sizeof(hsrc));
+  memset(buf_data[1], 0, n*sizeof(double));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    double dd = ((double *)(buf_data[1]))[i];
+//    printf("%f	   %f, diff is %%%f\n", dd, ddst[i], fabs(dd - ddst[i])/fabs(ddst[i]));
+    OCL_ASSERT(fabs(dd - ddst[i]) < 0.001f * fabs(ddst[i]));
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_half_to_double);
+
+void compiler_double_to_half(void)
+{
+  const size_t n = 16;
+  uint16_t hdst[n];
+  double src[n];
+  uint32_t tmp_f;
+  float f;
+
+//  if (!check_half_device())
+//    return;
+  if (!cl_check_double())
+    return;
+
+  // Setup kernel and buffers
+  OCL_CREATE_KERNEL_FROM_FILE("compiler_half_convert", "compiler_double_to_half");
+  OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(double), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL);
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  globals[0] = n;
+  locals[0] = 16;
+
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    f = -100.1f + 10.3f * i;
+    src[i] = (double)f;
+    memcpy(&tmp_f, &f, sizeof(float));
+    hdst[i] = __float_to_half(tmp_f);
+  }
+
+  OCL_MAP_BUFFER(0);
+  OCL_MAP_BUFFER(1);
+  memcpy(buf_data[0], src, sizeof(src));
+  memset(buf_data[1], 0, n*sizeof(uint16_t));
+  OCL_UNMAP_BUFFER(0);
+  OCL_UNMAP_BUFFER(1);
+
+  // Run the kernel on GPU
+  OCL_NDRANGE(1);
+
+  // Compare
+  OCL_MAP_BUFFER(1);
+  for (int32_t i = 0; i < (int32_t) n; ++i) {
+    uint16_t hf = ((uint16_t *)(buf_data[1]))[i];
+    //tmp_f = __half_to_float(hf);
+    //memcpy(&f, &tmp_f, sizeof(float));
+    //printf("%f, %x, %x\n", f, hf, hdst[i]);
+    OCL_ASSERT(hf == hdst[i]);
+  }
+  OCL_UNMAP_BUFFER(1);
+}
+MAKE_UTEST_FROM_FUNCTION(compiler_double_to_half);
-- 
1.9.1





More information about the Beignet mailing list