[Beignet] [PATCH] utests: add utest to int8 convert to double. the algorithm is very simple, for convert_double_rte|z|p|n(int8 x) the input from -128 ~ 127 or 0 ~ 255 should get the same result

Thu Mar 16 08:04:57 UTC 2017

Signed-off-by: rander <rander.wang at intel.com>
---
 backend/src/libocl/script/ocl_convert.sh | 72 +++++++++++++++++++++++++++
 kernels/builtin_convert_int8toDouble.cl  | 36 ++++++++++++++
 utests/CMakeLists.txt                    |  3 +-
 utests/builtin_convert_int8toDouble.cpp  | 85 ++++++++++++++++++++++++++++++++
 4 files changed, 195 insertions(+), 1 deletion(-)
 create mode 100644 kernels/builtin_convert_int8toDouble.cl
 create mode 100644 utests/builtin_convert_int8toDouble.cpp

diff --git a/backend/src/libocl/script/ocl_convert.sh b/backend/src/libocl/script/ocl_convert.sh
index 3ef283b..ed9abeb 100755
--- a/backend/src/libocl/script/ocl_convert.sh
+++ b/backend/src/libocl/script/ocl_convert.sh
@@ -1033,6 +1033,78 @@ for vector_length in $VECTOR_LENGTHS; do
     done
 done
 
+# convert_double_roundingmode( int32, int16 ,int8)
+ITYPES=" int:4 uint:4 short:2 ushort:2 char:1 uchar:1"
+for vector_length in $VECTOR_LENGTHS; do
+    for ftype in $ITYPES; do
+	fbasetype=`IFS=:; set -- dummy $ftype; echo $2`
+
+	    if test $vector_length -eq 1; then
+		if [ $1"a" = "-pa" ]; then
+		    echo "OVERLOADABLE double convert_double_rte($fbasetype x);"
+		    echo "OVERLOADABLE double convert_double_rtz($fbasetype x);"
+		    echo "OVERLOADABLE double convert_double_rtp($fbasetype x);"
+		    echo "OVERLOADABLE double convert_double_rtn($fbasetype x);"
+		else
+		    echo "OVERLOADABLE double convert_double_rte($fbasetype x)"
+			echo "{ return convert_double(x); }"
+
+		    echo "OVERLOADABLE double convert_double_rtz($fbasetype x)"
+			echo "{ return convert_double(x); }"
+
+		    echo "OVERLOADABLE double convert_double_rtp($fbasetype x)"
+			echo "{ return convert_double(x); }"
+
+		    echo "OVERLOADABLE double convert_double_rtn($fbasetype x)"
+			echo "{ return convert_double(x); }"
+		fi
+		continue
+	    fi
+
+	    for rounding in $ROUNDING_MODES; do
+		fvectortype=$fbasetype$vector_length
+		tvectortype=double$vector_length
+		conv="convert_double_${rounding}"
+
+		construct="$conv(v.s0)"
+		if test $vector_length -gt 1; then
+		    construct="$construct, $conv(v.s1)"
+		fi
+		if test $vector_length -gt 2; then
+		    construct="$construct, $conv(v.s2)"
+		fi
+		if test $vector_length -gt 3; then
+		    construct="$construct, $conv(v.s3)"
+		fi
+		if test $vector_length -gt 4; then
+		    construct="$construct, $conv(v.s4)"
+		    construct="$construct, $conv(v.s5)"
+		    construct="$construct, $conv(v.s6)"
+		    construct="$construct, $conv(v.s7)"
+		fi
+		if test $vector_length -gt 8; then
+		    construct="$construct, $conv(v.s8)"
+		    construct="$construct, $conv(v.s9)"
+		    construct="$construct, $conv(v.sA)"
+		    construct="$construct, $conv(v.sB)"
+		    construct="$construct, $conv(v.sC)"
+		    construct="$construct, $conv(v.sD)"
+		    construct="$construct, $conv(v.sE)"
+		    construct="$construct, $conv(v.sF)"
+		fi
+
+		if [ $1"a" = "-pa" ]; then
+		    echo "OVERLOADABLE $tvectortype convert_${tvectortype}_${rounding}($fvectortype v);"
+		else
+		    echo "OVERLOADABLE $tvectortype convert_${tvectortype}_${rounding}($fvectortype v) {"
+		    echo "  return ($tvectortype)($construct);"
+		    echo "}"
+		    echo
+		fi
+	done
+    done
+done
+
 if [ $1"a" = "-pa" ]; then
     echo "#endif /* __OCL_CONVERT_H__ */"
 fi
diff --git a/kernels/builtin_convert_int8toDouble.cl b/kernels/builtin_convert_int8toDouble.cl
new file mode 100644
index 0000000..4fce238
--- /dev/null
+++ b/kernels/builtin_convert_int8toDouble.cl
@@ -0,0 +1,36 @@
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+__kernel void builtin_convert_int8toDouble(__global char *X,
+												__global uchar *uX,
+												__global double *Z,
+												int max_input)
+{
+	int i = get_global_id(0);
+	int j;
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtz(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtn(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rte(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtp(X[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtz(uX[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtn(uX[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rte(uX[j]);
+
+	for(j = 0; j < max_input; j++)
+		Z[i++] = convert_double_rtp(uX[j]);
+
+}
+
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 299831a..8f006c7 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -303,7 +303,8 @@ set (utests_sources
   builtin_convert_double2int8.cpp
   builtin_convert_double2int16.cpp
   builtin_convert_double2int32.cpp
-  builtin_convert_double2int64.cpp)
+  builtin_convert_double2int64.cpp
+  builtin_convert_int8toDouble.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/builtin_convert_int8toDouble.cpp b/utests/builtin_convert_int8toDouble.cpp
new file mode 100644
index 0000000..7dc4dc1
--- /dev/null
+++ b/utests/builtin_convert_int8toDouble.cpp
@@ -0,0 +1,85 @@
+#include "utest_helper.hpp"
+#include <cmath>
+#include <algorithm>
+
+namespace{
+
+const char*  testFunc[] =
+{
+    " char convert_double_rtz(char x)",
+    " char convert_double_rtn(char x)",
+    " char convert_double_rte(char x)",
+    " char convert_double_rtp(char x)",
+
+    " uchar convert_double_rtz(uchar x)",
+    " uchar convert_double_rtn(uchar x)",
+    " uchar convert_double_rte(uchar x)",
+    " uchar convert_double_rtp(uchar x)",
+};
+
+char *input_data;
+const int count_input = 256;
+const int max_function = 8;
+
+static void builtin_convert_int8toDouble(void)
+{
+  // Setup kernel and buffers
+  int k, i, index_cur;
+  double gpu_data[max_function * count_input] = {0};
+  float diff;
+  char log[256] = {0};
+
+  OCL_CREATE_KERNEL("builtin_convert_int8toDouble");
+
+  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_WRITE, count_input  * sizeof(char), NULL);
+  OCL_CREATE_BUFFER(buf[2], CL_MEM_READ_WRITE, count_input * max_function * sizeof(double), NULL);
+
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+  OCL_SET_ARG(3, sizeof(int), &count_input);
+
+  globals[0] = 1;
+  locals[0] = 1;
+
+  input_data = new char [256];
+  for(int i = 0; i < 256; i++)
+    input_data[i] = -128 + i;
+  clEnqueueWriteBuffer( queue, buf[0], CL_TRUE, 0, count_input * sizeof(char), input_data, 0, NULL, NULL);
+
+   for(int i = 0; i < 256; i++)
+     input_data[i] = i;
+   clEnqueueWriteBuffer( queue, buf[1], CL_TRUE, 0, count_input * sizeof(char), input_data, 0, NULL, NULL);
+
+   // Run the kernel
+  OCL_NDRANGE( 1 );
+
+    clEnqueueReadBuffer( queue, buf[2], CL_TRUE, 0, sizeof(double) * max_function * count_input, gpu_data, 0, NULL, NULL);
+
+    int index = 0;
+    for (k = 0; (uint)k < count_input*max_function/2; k++)
+    {
+        index = index % 256;
+        //OCL_ASSERT(gpu_data[k] == (double)(-128 + index));
+        if(gpu_data[k] != (double)(-128 + index))
+        {
+            printf("failed at function:%s, index:%d  expect value: %d, but get :%lf \n", testFunc[k/count_input], k%count_input, (-128 + index), gpu_data[k]);
+        }
+        index ++;
+    }
+
+    double *ugpu_data = (gpu_data + max_function*count_input/2);
+      for (k = 0; (uint)k < count_input*max_function/2; k++)
+      {
+            OCL_ASSERT(ugpu_data[k] == (double)(k%256));
+            if(ugpu_data[k] != (double)(k%256))
+            {
+                printf("failed at function:%s, index:%d expect value: %d, but get :%lf \n", testFunc[k/count_input + max_function/2], k%count_input, (k%256), ugpu_data[k]);
+            }
+      }
+
+}
+
+MAKE_UTEST_FROM_FUNCTION(builtin_convert_int8toDouble)
+}
-- 
2.7.4