[Beignet] [PATCH 4/5] runtime: set CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to kernel's SIMD_WIDTH.
Yang, Rong R
rong.r.yang at intel.com
Tue Nov 24 19:31:39 PST 2015
Pushed.
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Zhigang Gong
> Sent: Friday, November 13, 2015 7:26
> To: Gong, Zhigang
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 4/5] runtime: set
> CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to kernel's
> SIMD_WIDTH.
>
> On Thu, Nov 12, 2015 at 04:47:04PM +0800, Zhigang Gong wrote:
> > It makes sense to set
> CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to
> > the corresponding SIMD size. Then it provides a way for intel's OCL
> > application to get SIMD width at runtime and make some SIMD width
> > dependant optimization possible.
> >
> > Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> > ---
> > src/cl_api.c | 3 ++-
> > src/cl_command_queue_gen7.c | 2 +-
> > src/cl_device_id.c | 11 ++++++++++-
> > src/cl_device_id.h | 2 --
> > src/cl_gt_device.h | 1 -
> > 5 files changed, 13 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/cl_api.c b/src/cl_api.c index a18bc99..64206eb 100644
> > --- a/src/cl_api.c
> > +++ b/src/cl_api.c
> > @@ -3001,6 +3001,7 @@ clEnqueueNDRangeKernel(cl_command_queue
> command_queue,
> > err = cl_command_queue_flush(command_queue);
> > }
> >
> > +error:
> > if(b_output_kernel_perf)
> > {
> > if(kernel->program->build_opts != NULL) @@ -3008,7 +3009,7 @@
> > clEnqueueNDRangeKernel(cl_command_queue command_queue,
> > else
> > time_end(command_queue->ctx, cl_kernel_get_name(kernel), "",
> command_queue);
> > }
> > -error:
> > +
>
> The above change is to fix a dead lock when enable kernel performance
> measurement and ran into error in cl_command_queue_ND_range(). Forgot
> to mention it in the commit log.
>
> Thanks,
> Zhigang Gong.
>
> > return err;
> > }
> >
> > diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c
> > index 2edc3be..f0ee20a 100644
> > --- a/src/cl_command_queue_gen7.c
> > +++ b/src/cl_command_queue_gen7.c
> > @@ -329,7 +329,7 @@
> cl_command_queue_ND_range_gen7(cl_command_queue
> > queue,
> >
> > /* Compute the number of HW threads we need */
> > if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3,
> &local_sz) != CL_SUCCESS)) {
> > - fprintf(stderr, "Beignet: Work group size exceed Kerne's work group
> size.\n");
> > + fprintf(stderr, "Beignet: Work group size exceed Kernel's work
> > + group size.\n");
> > return err;
> > }
> > kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
> > diff --git a/src/cl_device_id.c b/src/cl_device_id.c index
> > 4551aa8..8186ac8 100644
> > --- a/src/cl_device_id.c
> > +++ b/src/cl_device_id.c
> > @@ -966,7 +966,16 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
> > return CL_SUCCESS;
> > }
> > }
> > - DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, device-
> >preferred_wg_sz_mul)
> > + case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
> > + {
> > + if (param_value && param_value_size < sizeof(size_t))
> > + return CL_INVALID_VALUE;
> > + if (param_value_size_ret != NULL)
> > + *param_value_size_ret = sizeof(size_t);
> > + if (param_value)
> > + *(size_t*)param_value = interp_kernel_get_simd_width(kernel-
> >opaque);
> > + return CL_SUCCESS;
> > + }
> > case CL_KERNEL_LOCAL_MEM_SIZE:
> > {
> > size_t local_mem_sz =
> > interp_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz;
> > diff --git a/src/cl_device_id.h b/src/cl_device_id.h index
> > 4a923ef..c5f9e57 100644
> > --- a/src/cl_device_id.h
> > +++ b/src/cl_device_id.h
> > @@ -108,8 +108,6 @@ struct _cl_device_id {
> > size_t driver_version_sz;
> > size_t spir_versions_sz;
> > size_t built_in_kernels_sz;
> > - /* Kernel specific info that we're assigning statically */
> > - size_t preferred_wg_sz_mul;
> > /* SubDevice specific info */
> > cl_device_id parent_device;
> > cl_uint partition_max_sub_device;
> > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index
> > de7a636..12987b7 100644
> > --- a/src/cl_gt_device.h
> > +++ b/src/cl_gt_device.h
> > @@ -39,7 +39,6 @@
> > .native_vector_width_float = 4,
> > .native_vector_width_double = 2,
> > .native_vector_width_half = 8,
> > -.preferred_wg_sz_mul = 16,
> > .address_bits = 32,
> > .max_mem_alloc_size = 512 * 1024 * 1024, .image_support = CL_TRUE,
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list