[Beignet] [PATCH] Benchmark: evaluate internal and native math functions
Grigore Lupescu
grigore.lupescu at intel.com
Thu Mar 3 15:11:58 UTC 2016
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
benchmark/CMakeLists.txt | 3 +-
benchmark/benchmark_math.cpp | 151 ++++++++++++++++++++++++
kernels/bench_math.cl | 269 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 422 insertions(+), 1 deletion(-)
create mode 100644 benchmark/benchmark_math.cpp
create mode 100644 kernels/bench_math.cl
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index dd33829..4c3c933 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -18,7 +18,8 @@ set (benchmark_sources
benchmark_copy_buffer_to_image.cpp
benchmark_copy_image_to_buffer.cpp
benchmark_copy_buffer.cpp
- benchmark_copy_image.cpp)
+ benchmark_copy_image.cpp
+ benchmark_math.cpp)
SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
diff --git a/benchmark/benchmark_math.cpp b/benchmark/benchmark_math.cpp
new file mode 100644
index 0000000..969aa08
--- /dev/null
+++ b/benchmark/benchmark_math.cpp
@@ -0,0 +1,151 @@
+#include "utests/utest_helper.hpp"
+#include <sys/time.h>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <iostream>
+#include "utest_helper.hpp"
+#include <sys/time.h>
+
+double benchmark_generic_math(const char* str_filename,
+ const char* str_kernel)
+{
+ double elapsed = 0;
+ struct timeval start,stop;
+ const size_t global_size = 1024 * 1024;
+ const size_t local_size = 64;
+
+ /* Compute math OP, loop times on global size */
+ cl_float base = 1.000002;
+ cl_float pwr = 1.0102003;
+ uint32_t loop = 1000;
+
+ /* Input set will be generated */
+ float* src = (float*)calloc(sizeof(float), global_size);
+ OCL_ASSERT(src != NULL);
+ for(uint32_t i = 0; i < global_size; i++)
+ src[i] = base + i * (base - 1);
+
+ /* Setup kernel and buffers */
+ OCL_CALL(cl_kernel_init, str_filename, str_kernel, SOURCE, NULL);
+
+ OCL_CREATE_BUFFER(buf[0], 0, (global_size) * sizeof(float), NULL);
+ OCL_CREATE_BUFFER(buf[1], 0, (global_size) * sizeof(float), NULL);
+
+ OCL_MAP_BUFFER(0);
+ memcpy(buf_data[0], src, global_size * sizeof(float));
+ OCL_UNMAP_BUFFER(0);
+
+ globals[0] = global_size;
+ locals[0] = local_size;
+
+ OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
+ OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]);
+ OCL_SET_ARG(2, sizeof(cl_float), &pwr);
+ OCL_SET_ARG(3, sizeof(cl_uint), &loop);
+
+ /* Measure performance */
+ gettimeofday(&start,0);
+ OCL_NDRANGE(1);
+ clFinish(queue);
+ gettimeofday(&stop,0);
+ elapsed = time_subtract(&stop, &start, 0);
+
+ /* Show compute results */
+ OCL_MAP_BUFFER(1);
+ for(uint32_t i = 0; i < global_size; i += 8192)
+ printf("\t%.3f", ((float*)buf_data[1])[i]);
+ OCL_UNMAP_BUFFER(1);
+
+ return BANDWIDTH(global_size * loop, elapsed);
+}
+
+double benchmark_math_pow(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_pow");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_pow, "Mop/s");
+
+double benchmark_math_native_powr(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_powr");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_powr, "Mop/s");
+
+double benchmark_math_exp2(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_exp2");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp2, "Mop/s");
+
+double benchmark_math_exp10(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_exp10");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_exp10, "Mop/s");
+
+double benchmark_math_native_exp10(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_exp10");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_exp10, "Mop/s");
+
+double benchmark_math_log2(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_log2");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log2, "Mop/s");
+
+double benchmark_math_native_log2(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_log2");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_log2, "Mop/s");
+
+double benchmark_math_log10(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_log10");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_log10, "Mop/s");
+
+double benchmark_math_native_log10(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_log10");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_log10, "Mop/s");
+
+double benchmark_math_sqrt(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_sqrt");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_sqrt, "Mop/s");
+
+double benchmark_math_sin(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_sin");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_sin, "Mop/s");
+
+double benchmark_math_native_sin(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_sin");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_sin, "Mop/s");
+
+double benchmark_math_cos(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_cos");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_cos, "Mop/s");
+
+double benchmark_math_native_cos(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_cos");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_cos, "Mop/s");
+
+double benchmark_math_tan(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_tan");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_tan, "Mop/s");
+
+double benchmark_math_native_tan(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_native_tan");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_native_tan, "Mop/s");
+
+double benchmark_math_asin(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_asin");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_asin, "Mop/s");
+
+double benchmark_math_acos(void){
+ return benchmark_generic_math("bench_math.cl", "bench_math_acos");
+}
+MAKE_BENCHMARK_FROM_FUNCTION(benchmark_math_acos, "Mop/s");
diff --git a/kernels/bench_math.cl b/kernels/bench_math.cl
new file mode 100644
index 0000000..9a689be
--- /dev/null
+++ b/kernels/bench_math.cl
@@ -0,0 +1,269 @@
+/* benchmark pow performance */
+kernel void bench_math_pow(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = pow(result, pwr);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark powr native performance */
+kernel void bench_math_native_powr(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_powr(result, pwr);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark exp2 performance */
+kernel void bench_math_exp2(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = exp2(pwr) - exp2(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark exp10 performance */
+kernel void bench_math_exp10(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = exp10(pwr) - exp10(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark native exp10 performance */
+kernel void bench_math_native_exp10(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_exp10(pwr) - native_exp10(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark log2 performance */
+kernel void bench_math_log2(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = log2(result) + pwr;
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark native log2 performance */
+kernel void bench_math_native_log2(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_log2(result) + pwr;
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark log10 performance */
+kernel void bench_math_log10(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = log10(result) + pwr;
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark native log10 performance */
+kernel void bench_math_native_log10(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_log10(result) + pwr;
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark sqrt performance */
+kernel void bench_math_sqrt(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = sqrt(result) + sqrt(pwr + result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark sin performance */
+kernel void bench_math_sin(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = sin(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark native sin performance */
+kernel void bench_math_native_sin(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_sin(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark cos performance */
+kernel void bench_math_cos(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = cos(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark native cos performance */
+kernel void bench_math_native_cos(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_cos(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark tan performance */
+kernel void bench_math_tan(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = tan(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark native tan performance */
+kernel void bench_math_native_tan(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = native_tan(result);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark asin performance */
+kernel void bench_math_asin(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = asin(pwr - 1);
+
+ dst[get_global_id(0)] = result;
+}
+
+/* benchmark acos performance */
+kernel void bench_math_acos(
+ global float *src,
+ global float *dst,
+ float pwr,
+ uint loop)
+{
+ float result = src[get_global_id(0)];
+
+ for(; loop > 0; loop--)
+ result = acos(pwr - 1);
+
+ dst[get_global_id(0)] = result;
+}
--
2.5.0
More information about the Beignet
mailing list