[Beignet] [PATCH] Add read_imagef benchmark for optimization.

Wed Sep 14 05:46:47 UTC 2016

On Mon, 2016-09-12 at 06:53 +0000, Yang, Rong R wrote:
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On
> > Behalf Of
> > yan.wang at linux.intel.com
> > Sent: Monday, September 5, 2016 14:52
> > To: beignet at lists.freedesktop.org
> > Cc: Yan Wang <yan.wang at linux.intel.com>
> > Subject: [Beignet] [PATCH] Add read_imagef benchmark for
> > optimization.
> > 
> > From: Yan Wang <yan.wang at linux.intel.com>
> > 
> > ---
> >  benchmark/CMakeLists.txt                 |  1 +
> >  benchmark/benchmark_read_image_float.cpp | 65
> > ++++++++++++++++++++++++++++++++
> >  kernels/compiler_read_image_float.cl     |  9 +++++
> >  3 files changed, 75 insertions(+)
> >  create mode 100644 benchmark/benchmark_read_image_float.cpp
> >  create mode 100644 kernels/compiler_read_image_float.cl
> > 
> > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
> > index
> > f9b246b..556275e 100644
> > --- a/benchmark/CMakeLists.txt
> > +++ b/benchmark/CMakeLists.txt
> > @@ -15,6 +15,7 @@ set (benchmark_sources
> >    benchmark_use_host_ptr_buffer.cpp
> >    benchmark_read_buffer.cpp
> >    benchmark_read_image.cpp
> > +  benchmark_read_image_float.cpp
> >    benchmark_copy_buffer_to_image.cpp
> >    benchmark_copy_image_to_buffer.cpp
> >    benchmark_copy_buffer.cpp
> > diff --git a/benchmark/benchmark_read_image_float.cpp
> > b/benchmark/benchmark_read_image_float.cpp
> > new file mode 100644
> > index 0000000..b0c2fb4
> > --- /dev/null
> > +++ b/benchmark/benchmark_read_image_float.cpp
> > @@ -0,0 +1,65 @@
> > +#include <string.h>
> > +#include "utests/utest_helper.hpp"
> > +#include <sys/time.h>
> > +
> > +#define NUM 400
> > +
> > +double benchmark_read_image_float(void) {
> > +  struct timeval start,stop;
> > +
> > +  const size_t w = 128;
> > +  const size_t h = 128;
> > +  const size_t sz = w * h;
> > +  cl_image_format format;
> > +  cl_image_desc desc;
> > +
> > +  memset(&desc, 0x0, sizeof(cl_image_desc));  memset(&format, 0x0,
> > + sizeof(cl_image_format));
> > +
> > +  // Setup kernel and images
> > +  OCL_CREATE_KERNEL("compiler_read_image_float");
> > +  buf_data[0] = (uint32_t*) malloc(sizeof(float) * sz);  for
> > (uint32_t
> > + i = 0; i < sz; ++i) {
> > +    ((float*)buf_data[0])[i] = rand();
> > +  }
> > +
> > +  format.image_channel_order = CL_R;
> > +  format.image_channel_data_type = CL_FLOAT;  desc.image_type =
> > + CL_MEM_OBJECT_IMAGE2D;  desc.image_width = w;  desc.image_height
> > =
> > h;
> > + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc,
> > + buf_data[0]);  OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(float),
> > NULL);
> > +
> > +  free(buf_data[0]);
> > +  buf_data[0] = NULL;
> > +
> > +  // Run the kernel
> > +  OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]);  OCL_SET_ARG(1,
> > + sizeof(cl_mem), &buf[1]);  OCL_SET_ARG(2, sizeof(cl_int), &w);
> > + globals[0] = w;  globals[1] = h;  locals[0] = 16;  locals[1] =
> > 16;
> > +
> > +  OCL_NDRANGE(2);
> > +  OCL_FINISH();
> > +
> > +  gettimeofday(&start,0);
> > +  for (size_t i=0; i<NUM; i++) {
> > +    OCL_NDRANGE(2);
> > +  }
> > +  OCL_FINISH();
> > +  gettimeofday(&stop,0);
> > +
> > +  free(buf_data[0]);
> > +  buf_data[0] = NULL;
> > +
> > +  double elapsed = time_subtract(&stop, &start, 0);
> > +
> > +  return BANDWIDTH(sz * sizeof(float) * NUM, elapsed); }
> > +
> > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_image_float,
> > "GB/S");
> > diff --git a/kernels/compiler_read_image_float.cl
> > b/kernels/compiler_read_image_float.cl
> > new file mode 100644
> > index 0000000..f581438
> > --- /dev/null
> > +++ b/kernels/compiler_read_image_float.cl
> > @@ -0,0 +1,9 @@
> > +__constant sampler_t sampler    = CLK_NORMALIZED_COORDS_FALSE |
> > CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
> Do you really need CLK_FILTER_LINEAR?
> CLK_FILTER_LINEAR's image filter/cache's behavior is more complicated 
> than CLK_FILTER_NEAREST. For benchmark, I think CLK_FILTER_NEAREST is 
> better.
This benchmark is comparing with OpenCV optical flow cases which use
CLK_FILTER_LINEAR. So far I think CLK_FILTER_NEAREST is OK too.
I could submit it again. 
> 
> > +
> > +__kernel void compiler_read_image_float(__read_only image2d_t src,
> > +__global float* dst, int w) {
> > +    int xi = get_global_id(0);
> > +    int yi = get_global_id(1);
> > +    float4 v = read_imagef(src, sampler, (float2)(xi, yi));
> > +    *(dst + yi * w + xi) = v.x;
> > +}
> > --
> > 1.9.1
> > 
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet
> > 
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet