[Beignet] [PATCH] Add benchmark for evaluating performance of math operations

Grigore Lupescu grigore.lupescu at intel.com
Sun Feb 21 15:36:12 UTC 2016


Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 benchmark/CMakeLists.txt     |  3 ++-
 benchmark/benchmark_math.cpp | 60 ++++++++++++++++++++++++++++++++++++++++++++
 kernels/bench_math.cl        | 13 ++++++++++
 3 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 benchmark/benchmark_math.cpp
 create mode 100644 kernels/bench_math.cl

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index dd33829..4c3c933 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -18,7 +18,8 @@ set (benchmark_sources
   benchmark_copy_buffer_to_image.cpp
   benchmark_copy_image_to_buffer.cpp
   benchmark_copy_buffer.cpp
-  benchmark_copy_image.cpp)
+  benchmark_copy_image.cpp
+  benchmark_math.cpp)
 
 
 SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
diff --git a/benchmark/benchmark_math.cpp b/benchmark/benchmark_math.cpp
new file mode 100644
index 0000000..b93a4f3
--- /dev/null
+++ b/benchmark/benchmark_math.cpp
@@ -0,0 +1,60 @@
+#include "utests/utest_helper.hpp"
+#include <sys/time.h>
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+#include <sys/time.h>
+
+double benchmark_math_exp(void)
+{
+  double elapsed = 0;
+  struct timeval start,stop;
+  const size_t global_size = 1024 * 1024;
+  const size_t local_size = 128;
+  const uint32_t reduce_loop = 10000;
+
+  /* Input set will be generated */
+  float* src = (float*)calloc(sizeof(float), global_size);
+  OCL_ASSERT(src != NULL);
+  for(uint32_t i = 0; i < global_size; i++)
+    src[i] = i % local_size;
+
+  /* Setup kernel and buffers */
+  OCL_CREATE_KERNEL_FROM_FILE("bench_math",
+    "bench_math_exp");
+
+  OCL_CREATE_BUFFER(buf[0], 0, (global_size) * sizeof(float), NULL);
+  OCL_CREATE_BUFFER(buf[1], 0, (global_size) * sizeof(float), NULL);
+
+  OCL_MAP_BUFFER(0);
+  memcpy(buf_data[0], src, global_size * sizeof(float));
+  OCL_UNMAP_BUFFER(0);
+
+  globals[0] = global_size;
+  locals[0] = local_size;
+
+  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+  OCL_SET_ARG(2, sizeof(cl_uint), &reduce_loop);
+
+  /* Measure performance */
+  gettimeofday(&start,0);
+  OCL_NDRANGE(1);
+  clFinish(queue);
+  gettimeofday(&stop,0);
+  elapsed = time_subtract(&stop, &start, 0);
+
+  /* Check results */
+  OCL_MAP_BUFFER(1);
+  for(uint32_t i = 0; i < global_size; i += local_size){
+    //printf(" %f", ((float*)buf_data[1])[i]);
+    //OCL_ASSERT( ((float*)buf_data[1])[i] == (float)i );
+  }
+  OCL_UNMAP_BUFFER(1);
+
+  return BANDWIDTH(global_size * reduce_loop, elapsed);
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp, "Mflops/sec");
diff --git a/kernels/bench_math.cl b/kernels/bench_math.cl
new file mode 100644
index 0000000..75da4d2
--- /dev/null
+++ b/kernels/bench_math.cl
@@ -0,0 +1,13 @@
+kernel void bench_math_exp(
+  global float *src,
+  global float *dst,
+  uint reduce_loop)
+{
+  float val = src[get_global_id(0)];
+  float result = exp(result);
+
+  for(; reduce_loop > 0; reduce_loop--)
+    result = exp(result);
+
+  dst[get_global_id(0)] = result;
+}
-- 
2.5.0



More information about the Beignet mailing list