[Beignet] [PATCH] benchmark test for global read and write bandwidth

Mon Aug 24 00:49:53 PDT 2015

Some comments, thanks.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Meng Mengmeng
> Sent: Saturday, August 15, 2015 22:48
> To: beignet at lists.freedesktop.org
> Cc: Meng, Mengmeng
> Subject: [Beignet] [PATCH] benchmark test for global read and write
> bandwidth
> 
> ---
>  benchmark/CMakeLists.txt        |  3 ++-
>  benchmark/benchmark_io_test.cpp | 48
> +++++++++++++++++++++++++++++++++++++++++
>  kernels/benchmark_io.cl         | 26 ++++++++++++++++++++++
>  3 files changed, 76 insertions(+), 1 deletion(-)  create mode 100644
> benchmark/benchmark_io_test.cpp  create mode 100644
> kernels/benchmark_io.cl
> 
> diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index
> 3e43a21..1b9fe54 100644
> --- a/benchmark/CMakeLists.txt
> +++ b/benchmark/CMakeLists.txt
> @@ -16,7 +16,8 @@ set (benchmark_sources
>    benchmark_read_buffer.cpp
>    benchmark_read_image.cpp
>    benchmark_copy_buffer_to_image.cpp
> -  benchmark_copy_image_to_buffer.cpp)
> +  benchmark_copy_image_to_buffer.cpp
> +  benchmark_io_test)
> 
> 
>  SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
> diff --git a/benchmark/benchmark_io_test.cpp
> b/benchmark/benchmark_io_test.cpp new file mode 100644 index
> 0000000..5c95ce3
> --- /dev/null
> +++ b/benchmark/benchmark_io_test.cpp
> @@ -0,0 +1,48 @@
> +#include "utests/utest_helper.hpp"
> +#include <sys/time.h>
> +
> +struct timeval start,stop;
> +const size_t n = 1024 * 1024;
> +int count = 16;
> +const size_t sz = 4 * n * count;
Why size is 4 * n * count?

> +
> +#define BENCH_address(V,T) \
> +static double benchmark ##V(void) \
> +{ \
> + \
> +  OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_ONLY, sz * sizeof(float),
> +NULL); \ 
CPU also access buf[0], so the cl_mem_flags is not CL_MEM_READ_ONLY, right?

> +  OCL_CREATE_BUFFER(buf[1], CL_MEM_READ_ONLY, sz * sizeof(float),
> +NULL); \  \
CL_MEM_WRITE_ONLY?

> +  OCL_CREATE_KERNEL_FROM_FILE("benchmark_io",T); \  \
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);\
> +  OCL_SET_ARG(1, sizeof(cl_mem), &buf[1]); \  \
> +  OCL_MAP_BUFFER(0); \
> +  for (size_t i = 0; i < sz; i ++) { \
> +    ((float *)(buf_data[0]))[i] = rand(); \
> +  } \
> +  OCL_UNMAP_BUFFER(0);\
> + \
> +  globals[0] = n; \
> +  locals[0] = 256; \
> + \
> +  gettimeofday(&start,0); \
> +  for (size_t i=0; i<100; i++) { \
> +    OCL_NDRANGE(1);\
> +  } \
> +  OCL_FINISH();\
> +  gettimeofday(&stop,0);\
> + \
> +  clReleaseMemObject(buf[0]);\
> +  free(buf_data[0]);\
> +  buf_data[0] = NULL; \
Also need release buf[1]

> +\
> +  double elapsed = time_subtract(&stop, &start, 0);\ \
> +  return BANDWIDTH(sz * sizeof(float) * 1 * 100, elapsed);\ } \
> +MAKE_BENCHMARK_FROM_FUNCTION_KEEP_PROGRAM(benchmark
> ##V,true);
> +
> +BENCH_address(_global_write,"benchmark_global_write")
> +BENCH_address(_global_read,"benchmark_global_read")
> diff --git a/kernels/benchmark_io.cl b/kernels/benchmark_io.cl new file
> mode 100644 index 0000000..6a3150b
> --- /dev/null
> +++ b/kernels/benchmark_io.cl
> @@ -0,0 +1,26 @@
> +#define COUNT 100
> +__kernel void
> +benchmark_global_write(__global float * src,  __global float* dst) {
> +  float sum = 0 ;
> +  int id = (int)get_global_id(0);
> +
> +  if (id%10 == 1)
> +    dst[id] = src[id]/2 + 1;
> +  else
> +    dst[id] = src[id]/2 - 1;
One write with one read, it is not only write performance.

> +}
> +__kernel void
> +benchmark_global_read(__global float * src,  __global float* dst) {
> +  float sum = 0 ;
> +  int id = (int)get_global_id(0);
> +
> +  for (int i=1; i<COUNT; i++) {
> +    sum = sum + src[id%i];
Only read 0~i-1 memory, there are lots cache hit, so I'm afraid  it is not the realistic read performance, It is better to read different  memory.

> +  }
> +
> +  if (id%10 == 1)
> +    dst[id] = sum;
> +}
> +
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet