[Beignet] [PATCH 2/2] Utest: Add test cases for half geometric functions.
junyan.he at inbox.com
junyan.he at inbox.com
Mon Jul 6 03:26:20 PDT 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
kernels/compiler_half_geometry.cl | 12 ++++
utests/compiler_half.cpp | 124 +++++++++++++++++++++++++++++++++++++-
2 files changed, 135 insertions(+), 1 deletion(-)
create mode 100644 kernels/compiler_half_geometry.cl
diff --git a/kernels/compiler_half_geometry.cl b/kernels/compiler_half_geometry.cl
new file mode 100644
index 0000000..687ad11
--- /dev/null
+++ b/kernels/compiler_half_geometry.cl
@@ -0,0 +1,12 @@
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+kernel void compiler_half_dot(global half4 *src0, global half4 *src1, global half *dst) {
+ int i = get_global_id(0);
+ half val = dot(src0[i], src1[i]);
+ dst[i] = val;
+}
+
+kernel void compiler_half_length(global half4 *src, global half *dst) {
+ int i = get_global_id(0);
+ half val = length(src[i]);
+ dst[i] = val;
+}
diff --git a/utests/compiler_half.cpp b/utests/compiler_half.cpp
index ce0f7da..63f1483 100644
--- a/utests/compiler_half.cpp
+++ b/utests/compiler_half.cpp
@@ -172,7 +172,7 @@ void compiler_half_basic(void)
for (int32_t i = 0; i < (int32_t) n; ++i) {
tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]);
memcpy(&f, &tmp_f, sizeof(float));
- printf("%f %f\n", f, fdst[i]);
+ //printf("%f %f\n", f, fdst[i]);
OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0));
}
OCL_UNMAP_BUFFER(1);
@@ -922,3 +922,125 @@ void compiler_half_to_long_sat(void)
OCL_UNMAP_BUFFER(1);
}
MAKE_UTEST_FROM_FUNCTION(compiler_half_to_long_sat);
+
+void compiler_half_dot(void)
+{
+ const size_t n = 16*4;
+ const size_t m = 16;
+ uint16_t hsrc0[n], hsrc1[n];
+ float fsrc0[n], fsrc1[n], fdst[m];
+ uint32_t tmp_f;
+ float f;
+
+ if (!check_half_device())
+ return;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_half_geometry", "compiler_half_dot");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint16_t), NULL);
+ OCL_CREATE_BUFFER(buf[2], 0, m * sizeof(uint16_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_mem), &buf[2]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ fsrc0[i] = 3.2 * i;
+ memcpy(&tmp_f, &fsrc0[i], sizeof(float));
+ hsrc0[i] = __float_to_half(tmp_f);
+ fsrc1[i] = 0.52 * i;
+ memcpy(&tmp_f, &fsrc1[i], sizeof(float));
+ hsrc1[i] = __float_to_half(tmp_f);
+ }
+
+ for (int32_t i = 0; i < (int32_t) m; ++i) {
+ fdst[i] = fsrc0[i*4] * fsrc1[i*4] +
+ fsrc0[i*4 + 1] * fsrc1[i*4 + 1] +
+ fsrc0[i*4 + 2] * fsrc1[i*4 + 2] +
+ fsrc0[i*4 + 3] * fsrc1[i*4 + 3];
+ }
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ OCL_MAP_BUFFER(2);
+ memcpy(buf_data[0], hsrc0, sizeof(hsrc0));
+ memcpy(buf_data[1], hsrc1, sizeof(hsrc1));
+ memset(buf_data[2], 0, m * sizeof(uint16_t));
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+ OCL_UNMAP_BUFFER(2);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(2);
+ for (int32_t i = 0; i < (int32_t) m; ++i) {
+ tmp_f = __half_to_float(((uint16_t *)buf_data[2])[i]);
+ memcpy(&f, &tmp_f, sizeof(float));
+ //printf("%f %f\n", f, fdst[i]);
+ OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0));
+ }
+ OCL_UNMAP_BUFFER(2);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_half_dot);
+
+void compiler_half_length(void)
+{
+ const size_t n = 16*4;
+ const size_t m = 16;
+ uint16_t hsrc[n];
+ float fsrc[n], fdst[m];
+ uint32_t tmp_f;
+ float f;
+
+ if (!check_half_device())
+ return;
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_half_geometry", "compiler_half_length");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint16_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, m * sizeof(uint16_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = n;
+ locals[0] = 16;
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ fsrc[i] = 3.2 * i;
+ memcpy(&tmp_f, &fsrc[i], sizeof(float));
+ hsrc[i] = __float_to_half(tmp_f);
+ }
+
+ for (int32_t i = 0; i < (int32_t) m; ++i) {
+ fdst[i] = sqrtf(fsrc[i*4] * fsrc[i*4] +
+ fsrc[i*4 + 1] * fsrc[i*4 + 1] +
+ fsrc[i*4 + 2] * fsrc[i*4 + 2] +
+ fsrc[i*4 + 3] * fsrc[i*4 + 3]);
+ }
+
+ OCL_MAP_BUFFER(0);
+ OCL_MAP_BUFFER(1);
+ memcpy(buf_data[0], hsrc, sizeof(hsrc));
+ memset(buf_data[1], 0, m * sizeof(uint16_t));
+ OCL_UNMAP_BUFFER(0);
+ OCL_UNMAP_BUFFER(1);
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < (int32_t) m; ++i) {
+ tmp_f = __half_to_float(((uint16_t *)buf_data[1])[i]);
+ memcpy(&f, &tmp_f, sizeof(float));
+ //printf("%f %f\n", f, fdst[i]);
+ OCL_ASSERT(fabs(f - fdst[i]) <= 0.01 * fabs(fdst[i]) || (fdst[i] == 0.0 && f == 0.0));
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_half_length);
--
1.9.1
More information about the Beignet
mailing list