[Beignet] [PATCH] add test of copy_image_1d into benchmark

Zhigang Gong zhigang.gong at linux.intel.com
Sun Dec 14 20:08:09 PST 2014


On Tue, Dec 09, 2014 at 02:32:23PM +0800, Zhu Bingbing wrote:
> Signed-off-by: Zhu Bingbing <bingbingx.zhu at intel.com>
> ---
>  benchmark/CMakeLists.txt                   |  3 +-
>  benchmark/benchmark_copy_image_1d.cpp      | 55 ++++++++++++++++++++++++++++++
>  kernels/runtime_benchmark_copy_image_1d.cl |  8 +++++
>  3 files changed, 65 insertions(+), 1 deletion(-)
>  create mode 100644 benchmark/benchmark_copy_image_1d.cpp
>  create mode 100644 kernels/runtime_benchmark_copy_image_1d.cl
> 
> diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
> index ac2d8aa..86ac922 100644
> --- a/benchmark/CMakeLists.txt
> +++ b/benchmark/CMakeLists.txt
> @@ -12,7 +12,8 @@ set (benchmark_sources
>    ../utests/utest_helper.cpp
>    ../utests/vload_bench.cpp
>    enqueue_copy_buf.cpp
> -  benchmark_use_host_ptr_buffer.cpp)
> +  benchmark_use_host_ptr_buffer.cpp
> +  benchmark_copy_image_1d.cpp)
>  
>  
>  SET(CMAKE_CXX_FLAGS "-DBUILD_BENCHMARK ${CMAKE_CXX_FLAGS}")
> diff --git a/benchmark/benchmark_copy_image_1d.cpp b/benchmark/benchmark_copy_image_1d.cpp
> new file mode 100644
> index 0000000..07cf1b8
> --- /dev/null
> +++ b/benchmark/benchmark_copy_image_1d.cpp
> @@ -0,0 +1,55 @@
> +#include <sys/time.h>
> +#include <string.h>
> +#include "utests/utest_helper.hpp"
> +
> +static int benchmark_copy_image_1d(void)
> +{
> +  size_t i = 0;
> +  const size_t w = 512;
> +  cl_image_format format;
> +  cl_image_desc desc;
> +  cl_sampler sampler;
> +  struct timeval start;
> +  struct timeval stop;
> +
> +  memset(&desc, 0x0, sizeof(cl_image_desc));
> +  memset(&format, 0x0, sizeof(cl_image_format));
> +
> +  // Setup kernel and images
> +  OCL_CREATE_KERNEL("runtime_benchmark_copy_image_1d");
> +  buf_data[0] = (uint32_t*) malloc(sizeof(uint32_t) * w);
> +  for (uint32_t i = 0; i < w; i++)
> +      ((uint32_t*)buf_data[0])[i] = i;
> +
> +  format.image_channel_order = CL_RGBA;
> +  format.image_channel_data_type = CL_UNSIGNED_INT8;
> +  desc.image_type = CL_MEM_OBJECT_IMAGE1D;
> +  desc.image_width = w;
> +  desc.image_row_pitch = w * sizeof(uint32_t);
> +  OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc, buf_data[0]);
> +
> +  desc.image_row_pitch = 0;
> +  OCL_CREATE_SAMPLER(sampler, CL_ADDRESS_REPEAT, CL_FILTER_NEAREST);
> +
> +  // Run the kernel
> +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);
> +  OCL_SET_ARG(1, sizeof(sampler), &sampler);
> +  globals[0] = w;
> +  locals[0] = 16;
> +
> +  gettimeofday(&start,0);
> +  for (i = 0; i < 10000; i++)
> +  {
> +    OCL_NDRANGE(1);
> +    OCL_FINISH();
> +  }
> +  gettimeofday(&stop,0);
> +
> +  clReleaseMemObject(buf[0]);
> +  free(buf_data[0]);
> +  buf_data[0] = NULL;
> +
> +  return time_subtract(&stop, &start, 0);
> +}
> +
> +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_copy_image_1d);
> diff --git a/kernels/runtime_benchmark_copy_image_1d.cl b/kernels/runtime_benchmark_copy_image_1d.cl
> new file mode 100644
> index 0000000..3aa0429
> --- /dev/null
> +++ b/kernels/runtime_benchmark_copy_image_1d.cl
> @@ -0,0 +1,8 @@
> +__kernel void
> +runtime_benchmark_copy_image_1d(__read_only image1d_t src,  sampler_t sampler)
> +{
> +  int coord;
> +  int4 color;
> +  coord = (int)get_global_id(0);
> +  color = read_imagei(src, sampler, coord);

This is not the right method to measure sampler performance. It has the following 3 problems:

1. One work item only read one pixel which is very inefficient.
2. The color hasn't been used, so the read_imagei maybe optimized by the compiler.
3. Use dynamic sampler is very inefficient on Gen platform. Please use static sampler which is defined in kernel.
   You can refer the usage in kernels/test_copy_image1.cl.

> +}
> -- 
> 1.9.3
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list