[Beignet] [PATCH 25/27] Add test case for long mul_sat and mul_hi
junyan.he at inbox.com
junyan.he at inbox.com
Tue Jan 6 02:02:59 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
kernels/compiler_long_hi_sat.cl | 19 ++++
utests/CMakeLists.txt | 1 +
utests/compiler_long_hi_sat.cpp | 188 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 208 insertions(+)
create mode 100644 kernels/compiler_long_hi_sat.cl
create mode 100644 utests/compiler_long_hi_sat.cpp
diff --git a/kernels/compiler_long_hi_sat.cl b/kernels/compiler_long_hi_sat.cl
new file mode 100644
index 0000000..66e4ab9
--- /dev/null
+++ b/kernels/compiler_long_hi_sat.cl
@@ -0,0 +1,19 @@
+kernel void compiler_long_mul_hi(global long *src, global long *dst, long num0, long num1) {
+ int i = get_local_id(0);
+ long c;
+
+ if (i % 2 == 0) {
+ c = mul_hi(src[i], num0);
+ } else {
+ c = mul_hi(src[i], num1);
+ }
+ dst[i] = c;
+}
+
+kernel void compiler_long_mul_sat(global long *src, global long *dst, long num0, long num1) {
+ int i = get_local_id(0);
+ long c;
+
+ c = mad_sat(src[i], num0, num1);
+ dst[i] = c;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 5b29c0b..c8d87e2 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -162,6 +162,7 @@ set (utests_sources
runtime_compile_link.cpp
compiler_long.cpp
compiler_long_2.cpp
+ compiler_long_hi_sat.cpp
compiler_long_convert.cpp
compiler_long_shl.cpp
compiler_long_shr.cpp
diff --git a/utests/compiler_long_hi_sat.cpp b/utests/compiler_long_hi_sat.cpp
new file mode 100644
index 0000000..e1d5f3b
--- /dev/null
+++ b/utests/compiler_long_hi_sat.cpp
@@ -0,0 +1,188 @@
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+
+static void __u64_mul_u64(uint64_t sourceA, uint64_t sourceB, uint64_t &destLow, uint64_t &destHi)
+{
+ uint64_t lowA, lowB;
+ uint64_t highA, highB;
+
+ lowA = sourceA & 0xffffffff;
+ highA = sourceA >> 32;
+ lowB = sourceB & 0xffffffff;
+ highB = sourceB >> 32;
+
+ uint64_t aHibHi = highA * highB;
+ uint64_t aHibLo = highA * lowB;
+ uint64_t aLobHi = lowA * highB;
+ uint64_t aLobLo = lowA * lowB;
+
+ uint64_t aLobLoHi = aLobLo >> 32;
+ uint64_t aLobHiLo = aLobHi & 0xFFFFFFFFULL;
+ aHibLo += aLobLoHi + aLobHiLo;
+
+ destHi = aHibHi + (aHibLo >> 32 ) + (aLobHi >> 32); // Cant overflow
+ destLow = (aHibLo << 32) | ( aLobLo & 0xFFFFFFFFULL);
+}
+
+static void __64_mul_64(int64_t sourceA, int64_t sourceB, uint64_t &destLow, int64_t &destHi)
+{
+ int64_t aSign = sourceA >> 63;
+ int64_t bSign = sourceB >> 63;
+ int64_t resultSign = aSign ^ bSign;
+
+ // take absolute values of the argument
+ sourceA = (sourceA ^ aSign) - aSign;
+ sourceB = (sourceB ^ bSign) - bSign;
+
+ uint64_t hi;
+ __u64_mul_u64( (uint64_t) sourceA, (uint64_t) sourceB, destLow, hi );
+
+ // Fix the sign
+ if( resultSign ) {
+ destLow ^= resultSign;
+ hi ^= resultSign;
+ destLow -= resultSign;
+ //carry if necessary
+ if( 0 == destLow )
+ hi -= resultSign;
+ }
+
+ destHi = (int64_t) hi;
+}
+
+static void __mad_sat(int64_t sourceA, int64_t sourceB, int64_t sourceC, int64_t& dst)
+{
+ cl_long multHi;
+ cl_ulong multLo;
+ __64_mul_64(sourceA, sourceB, multLo, multHi);
+
+ cl_ulong sum = multLo + sourceC;
+
+ // carry if overflow
+ if(sourceC >= 0) {
+ if(multLo > sum) {
+ multHi++;
+ if(CL_LONG_MIN == multHi) {
+ multHi = CL_LONG_MAX;
+ sum = CL_ULONG_MAX;
+ }
+ }
+ } else {
+ if( multLo < sum ) {
+ multHi--;
+ if( CL_LONG_MAX == multHi ) {
+ multHi = CL_LONG_MIN;
+ sum = 0;
+ }
+ }
+ }
+
+ // saturate
+ if( multHi > 0 )
+ sum = CL_LONG_MAX;
+ else if( multHi < -1 )
+ sum = CL_LONG_MIN;
+
+ dst = (cl_long) sum;
+}
+
+void compiler_long_mul_hi(void)
+{
+ const size_t n = 32;
+ int64_t src[n];
+ int64_t num0 = 0xF00A00CED0090B0CUL;
+ int64_t num1 = 0x7FABCD57FC098FC1UL;
+ memset(src, 0, sizeof(int64_t) * n);
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_long_hi_sat", "compiler_long_mul_hi");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_long), &num0);
+ OCL_SET_ARG(3, sizeof(cl_long), &num1);
+ globals[0] = n;
+ locals[0] = 32;
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ uint64_t a = rand();
+ a = a <<32 | a;
+ src[i] = a;
+// printf(" 0x%lx", src[i]);
+ }
+
+ OCL_MAP_BUFFER(0);
+ memcpy(buf_data[0], src, sizeof(uint64_t) * n);
+ OCL_UNMAP_BUFFER(0);
+
+ uint64_t res_lo;
+ int64_t res_hi;
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ if (i % 2 == 0)
+ __64_mul_64(src[i], num0, res_lo, res_hi);
+ else
+ __64_mul_64(src[i], num1, res_lo, res_hi);
+
+ OCL_ASSERT(((int64_t *)(buf_data[1]))[i] == res_hi);
+// printf("hi is 0x%lx, result is 0x%lx\n", res_hi, ((int64_t *)(buf_data[1]))[i]);
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+void compiler_long_mul_sat(void)
+{
+ const size_t n = 32;
+ int64_t src[n];
+ int64_t num0 = 0xF00000CED8090B0CUL;
+ int64_t num1 = 0x0000000000098FC1UL;
+ memset(src, 0, sizeof(int64_t) * n);
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL_FROM_FILE("compiler_long_hi_sat", "compiler_long_mul_sat");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(uint64_t), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(uint64_t), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_long), &num0);
+ OCL_SET_ARG(3, sizeof(cl_long), &num1);
+ globals[0] = n;
+ locals[0] = 32;
+
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ uint64_t a = rand();
+ a = a <<32 | a;
+ src[i] = a;
+// printf(" 0x%lx", src[i]);
+ }
+
+ OCL_MAP_BUFFER(0);
+ memcpy(buf_data[0], src, sizeof(uint64_t) * n);
+ OCL_UNMAP_BUFFER(0);
+
+ int64_t res;
+
+ // Run the kernel on GPU
+ OCL_NDRANGE(1);
+
+ // Compare
+ OCL_MAP_BUFFER(1);
+ for (int32_t i = 0; i < (int32_t) n; ++i) {
+ __mad_sat(src[i], num0, num1, res);
+
+ OCL_ASSERT(((int64_t *)(buf_data[1]))[i] == res);
+ //printf("ref is 0x%lx, result is 0x%lx\n", res, ((int64_t *)(buf_data[1]))[i]);
+ }
+ OCL_UNMAP_BUFFER(1);
+}
+
+MAKE_UTEST_FROM_FUNCTION(compiler_long_mul_hi);
+MAKE_UTEST_FROM_FUNCTION(compiler_long_mul_sat);
--
1.9.1
More information about the Beignet
mailing list