[Beignet] [PATCH] utests: add utests for load/store optimization
rander.wang
rander.wang at intel.com
Mon Jun 5 01:30:02 UTC 2017
loads in this case can be merged to 4 from 8
Signed-off-by: rander.wang <rander.wang at intel.com>
---
kernels/compiler_load_store_merging.cl | 18 ++++++++++++
utests/CMakeLists.txt | 3 +-
utests/compiler_load_store_merging.cpp | 51 ++++++++++++++++++++++++++++++++++
3 files changed, 71 insertions(+), 1 deletion(-)
create mode 100644 kernels/compiler_load_store_merging.cl
create mode 100644 utests/compiler_load_store_merging.cpp
diff --git a/kernels/compiler_load_store_merging.cl b/kernels/compiler_load_store_merging.cl
new file mode 100644
index 0000000..4d78ec8
--- /dev/null
+++ b/kernels/compiler_load_store_merging.cl
@@ -0,0 +1,18 @@
+kernel void compiler_load_store_merging(global float *src, global float *dst) {
+ float result ;
+
+ int idx = get_global_id(0);
+ float p2 = src[idx+1];
+ float p4 = src[idx+32+4];
+ float p5 = src[idx+32+6];
+ float p3 = src[idx+2];
+ float p8 = src[idx+32*2+10];
+ float p6 = src[idx+32*2+8];
+ float p1 = src[idx];
+ float p7 = src[idx+32*2+9];
+
+ float dx = mad(2, p5 - p4, p3 - p1 + p8 - p6);
+ float dy = mad(2, p2 - p7, p3 - p8 + p1 - p6);
+
+ dst[idx] = dx*dy;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index cd061b2..cd5c4fb 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -304,7 +304,8 @@ set (utests_sources
runtime_pipe_query.cpp
compiler_pipe_builtin.cpp
compiler_device_enqueue.cpp
- compiler_global_immediate_optimized)
+ compiler_global_immediate_optimized.cpp
+ compiler_load_store_merging.cpp)
if (LLVM_VERSION_NODOT VERSION_GREATER 34)
SET(utests_sources
diff --git a/utests/compiler_load_store_merging.cpp b/utests/compiler_load_store_merging.cpp
new file mode 100644
index 0000000..d89342d
--- /dev/null
+++ b/utests/compiler_load_store_merging.cpp
@@ -0,0 +1,51 @@
+#include <cmath>
+#include "utest_helper.hpp"
+
+void compiler_load_store_merging(void) {
+ const int n = 128;
+ float src[n];
+
+ // Setup kernel and buffers
+ OCL_CREATE_KERNEL("compiler_load_store_merging");
+ OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ globals[0] = 1;
+ locals[0] = 1;
+
+ for (int j = 0; j < n ; j++) {
+ OCL_MAP_BUFFER(0);
+ for (int i = 0; i < n; ++i) {
+ src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 0.001f;
+ }
+ OCL_UNMAP_BUFFER(0);
+
+ OCL_NDRANGE(1);
+
+ OCL_MAP_BUFFER(1);
+ float *dst = (float*) buf_data[0];
+
+ float result ;
+
+ int idx = 0;
+ float p2 = src[idx+1];
+ float p4 = src[idx+32+4];
+ float p5 = src[idx+32+6];
+ float p3 = src[idx+2];
+ float p8 = src[idx+32*2+10];
+ float p6 = src[idx+32*2+8];
+ float p1 = src[idx];
+ float p7 = src[idx+32*2+9];
+
+ float dx = 2.0f * (p5 - p4) + (p3 - p1 + p8 - p6);
+ float dy = 2.0f * (p2 - p7) + (p3 - p8 + p1 - p6);
+
+ result = dx*dy;
+ OCL_ASSERT(((float*)buf_data[1])[0] == result);
+
+ OCL_UNMAP_BUFFER(1);
+ }
+}
+
+MAKE_UTEST_FROM_FUNCTION (compiler_load_store_merging);
--
2.7.4
More information about the Beignet
mailing list