[Beignet] [PATCH] utests: add utests for load/store optimization

rander.wang rander.wang at intel.com
Mon Jun 5 01:30:02 UTC 2017


	loads in this case can be merged to 4 from 8

Signed-off-by: rander.wang <rander.wang at intel.com>
---
 kernels/compiler_load_store_merging.cl | 18 ++++++++++++
 utests/CMakeLists.txt                  |  3 +-
 utests/compiler_load_store_merging.cpp | 51 ++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 kernels/compiler_load_store_merging.cl
 create mode 100644 utests/compiler_load_store_merging.cpp

diff --git a/kernels/compiler_load_store_merging.cl b/kernels/compiler_load_store_merging.cl
new file mode 100644
index 0000000..4d78ec8
--- /dev/null
+++ b/kernels/compiler_load_store_merging.cl
@@ -0,0 +1,18 @@
+kernel void compiler_load_store_merging(global float *src, global float *dst) {
+      float result ;
+
+      int idx = get_global_id(0);
+      float p2 = src[idx+1];
+      float p4 = src[idx+32+4];
+      float p5 = src[idx+32+6];
+      float p3 = src[idx+2];
+      float p8 = src[idx+32*2+10];
+      float p6 = src[idx+32*2+8];
+      float p1 = src[idx];
+      float p7 = src[idx+32*2+9];
+
+      float dx = mad(2, p5 - p4, p3 - p1 + p8 - p6);
+      float dy = mad(2, p2 - p7, p3 - p8 + p1 - p6);
+
+      dst[idx] =  dx*dy;
+}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index cd061b2..cd5c4fb 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -304,7 +304,8 @@ set (utests_sources
   runtime_pipe_query.cpp
   compiler_pipe_builtin.cpp
   compiler_device_enqueue.cpp
-  compiler_global_immediate_optimized)
+  compiler_global_immediate_optimized.cpp
+  compiler_load_store_merging.cpp)
 
 if (LLVM_VERSION_NODOT VERSION_GREATER 34)
   SET(utests_sources
diff --git a/utests/compiler_load_store_merging.cpp b/utests/compiler_load_store_merging.cpp
new file mode 100644
index 0000000..d89342d
--- /dev/null
+++ b/utests/compiler_load_store_merging.cpp
@@ -0,0 +1,51 @@
+#include <cmath>
+#include "utest_helper.hpp"
+
+void compiler_load_store_merging(void) {
+	const int n = 128;
+	float src[n];
+
+	// Setup kernel and buffers
+	OCL_CREATE_KERNEL("compiler_load_store_merging");
+	OCL_CREATE_BUFFER(buf[0], 0, n * sizeof(float), NULL);
+	OCL_CREATE_BUFFER(buf[1], 0, n * sizeof(float), NULL);
+	OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+	OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+	globals[0] = 1;
+	locals[0] = 1;
+
+	for (int j = 0; j < n ; j++) {
+		OCL_MAP_BUFFER(0);
+		for (int i = 0; i < n; ++i) {
+			src[i] = ((float*) buf_data[0])[i] = (j * n + i + 1) * 0.001f;
+		}
+		OCL_UNMAP_BUFFER(0);
+
+		OCL_NDRANGE(1);
+
+		OCL_MAP_BUFFER(1);
+		float *dst = (float*) buf_data[0];
+
+		float result ;
+
+		int idx = 0;
+		float p2 = src[idx+1];
+		float p4 = src[idx+32+4];
+		float p5 = src[idx+32+6];
+		float p3 = src[idx+2];
+		float p8 = src[idx+32*2+10];
+		float p6 = src[idx+32*2+8];
+		float p1 = src[idx];
+		float p7 = src[idx+32*2+9];
+
+		float dx = 2.0f * (p5 - p4) + (p3 - p1 + p8 - p6);
+		float dy = 2.0f * (p2 - p7) + (p3 - p8 + p1 - p6);
+
+		result =  dx*dy;
+		OCL_ASSERT(((float*)buf_data[1])[0] == result);
+
+		OCL_UNMAP_BUFFER(1);
+	}
+}
+
+MAKE_UTEST_FROM_FUNCTION (compiler_load_store_merging);
-- 
2.7.4



More information about the Beignet mailing list