[Beignet] [PATCH] Add read_imagef benchmark for optimization.
Yan Wang
yan.wang at linux.intel.com
Wed Sep 14 05:46:47 UTC 2016
On Mon, 2016-09-12 at 06:53 +0000, Yang, Rong R wrote:
>
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On
> > Behalf Of
> > yan.wang at linux.intel.com
> > Sent: Monday, September 5, 2016 14:52
> > To: beignet at lists.freedesktop.org
> > Cc: Yan Wang <yan.wang at linux.intel.com>
> > Subject: [Beignet] [PATCH] Add read_imagef benchmark for
> > optimization.
> >
> > From: Yan Wang <yan.wang at linux.intel.com>
> >
> > ---
> > benchmark/CMakeLists.txt | 1 +
> > benchmark/benchmark_read_image_float.cpp | 65
> > ++++++++++++++++++++++++++++++++
> > kernels/compiler_read_image_float.cl | 9 +++++
> > 3 files changed, 75 insertions(+)
> > create mode 100644 benchmark/benchmark_read_image_float.cpp
> > create mode 100644 kernels/compiler_read_image_float.cl
> >
> > diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
> > index
> > f9b246b..556275e 100644
> > --- a/benchmark/CMakeLists.txt
> > +++ b/benchmark/CMakeLists.txt
> > @@ -15,6 +15,7 @@ set (benchmark_sources
> > benchmark_use_host_ptr_buffer.cpp
> > benchmark_read_buffer.cpp
> > benchmark_read_image.cpp
> > + benchmark_read_image_float.cpp
> > benchmark_copy_buffer_to_image.cpp
> > benchmark_copy_image_to_buffer.cpp
> > benchmark_copy_buffer.cpp
> > diff --git a/benchmark/benchmark_read_image_float.cpp
> > b/benchmark/benchmark_read_image_float.cpp
> > new file mode 100644
> > index 0000000..b0c2fb4
> > --- /dev/null
> > +++ b/benchmark/benchmark_read_image_float.cpp
> > @@ -0,0 +1,65 @@
> > +#include <string.h>
> > +#include "utests/utest_helper.hpp"
> > +#include <sys/time.h>
> > +
> > +#define NUM 400
> > +
> > +double benchmark_read_image_float(void) {
> > + struct timeval start,stop;
> > +
> > + const size_t w = 128;
> > + const size_t h = 128;
> > + const size_t sz = w * h;
> > + cl_image_format format;
> > + cl_image_desc desc;
> > +
> > + memset(&desc, 0x0, sizeof(cl_image_desc)); memset(&format, 0x0,
> > + sizeof(cl_image_format));
> > +
> > + // Setup kernel and images
> > + OCL_CREATE_KERNEL("compiler_read_image_float");
> > + buf_data[0] = (uint32_t*) malloc(sizeof(float) * sz); for
> > (uint32_t
> > + i = 0; i < sz; ++i) {
> > + ((float*)buf_data[0])[i] = rand();
> > + }
> > +
> > + format.image_channel_order = CL_R;
> > + format.image_channel_data_type = CL_FLOAT; desc.image_type =
> > + CL_MEM_OBJECT_IMAGE2D; desc.image_width = w; desc.image_height
> > =
> > h;
> > + OCL_CREATE_IMAGE(buf[0], CL_MEM_COPY_HOST_PTR, &format, &desc,
> > + buf_data[0]); OCL_CREATE_BUFFER(buf[1], 0, sz * sizeof(float),
> > NULL);
> > +
> > + free(buf_data[0]);
> > + buf_data[0] = NULL;
> > +
> > + // Run the kernel
> > + OCL_SET_ARG(0, sizeof(cl_mem), &buf[0]); OCL_SET_ARG(1,
> > + sizeof(cl_mem), &buf[1]); OCL_SET_ARG(2, sizeof(cl_int), &w);
> > + globals[0] = w; globals[1] = h; locals[0] = 16; locals[1] =
> > 16;
> > +
> > + OCL_NDRANGE(2);
> > + OCL_FINISH();
> > +
> > + gettimeofday(&start,0);
> > + for (size_t i=0; i<NUM; i++) {
> > + OCL_NDRANGE(2);
> > + }
> > + OCL_FINISH();
> > + gettimeofday(&stop,0);
> > +
> > + free(buf_data[0]);
> > + buf_data[0] = NULL;
> > +
> > + double elapsed = time_subtract(&stop, &start, 0);
> > +
> > + return BANDWIDTH(sz * sizeof(float) * NUM, elapsed); }
> > +
> > +MAKE_BENCHMARK_FROM_FUNCTION(benchmark_read_image_float,
> > "GB/S");
> > diff --git a/kernels/compiler_read_image_float.cl
> > b/kernels/compiler_read_image_float.cl
> > new file mode 100644
> > index 0000000..f581438
> > --- /dev/null
> > +++ b/kernels/compiler_read_image_float.cl
> > @@ -0,0 +1,9 @@
> > +__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
> > CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_LINEAR;
> Do you really need CLK_FILTER_LINEAR?
> CLK_FILTER_LINEAR's image filter/cache's behavior is more complicated
> than CLK_FILTER_NEAREST. For benchmark, I think CLK_FILTER_NEAREST is
> better.
This benchmark is comparing with OpenCV optical flow cases which use
CLK_FILTER_LINEAR. So far I think CLK_FILTER_NEAREST is OK too.
I could submit it again.
>
> > +
> > +__kernel void compiler_read_image_float(__read_only image2d_t src,
> > +__global float* dst, int w) {
> > + int xi = get_global_id(0);
> > + int yi = get_global_id(1);
> > + float4 v = read_imagef(src, sampler, (float2)(xi, yi));
> > + *(dst + yi * w + xi) = v.x;
> > +}
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list