[Beignet] [PATCH 4/5] runtime: set CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to kernel's SIMD_WIDTH.

Yang, Rong R rong.r.yang at intel.com
Tue Nov 24 19:31:39 PST 2015


Pushed.

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> Zhigang Gong
> Sent: Friday, November 13, 2015 7:26
> To: Gong, Zhigang
> Cc: beignet at lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH 4/5] runtime: set
> CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to kernel's
> SIMD_WIDTH.
> 
> On Thu, Nov 12, 2015 at 04:47:04PM +0800, Zhigang Gong wrote:
> > It makes sense to set
> CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE to
> > the corresponding SIMD size. Then it provides a way for intel's OCL
> > application to get SIMD width at runtime and make some SIMD width
> > dependant optimization possible.
> >
> > Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> > ---
> >  src/cl_api.c                |  3 ++-
> >  src/cl_command_queue_gen7.c |  2 +-
> >  src/cl_device_id.c          | 11 ++++++++++-
> >  src/cl_device_id.h          |  2 --
> >  src/cl_gt_device.h          |  1 -
> >  5 files changed, 13 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/cl_api.c b/src/cl_api.c index a18bc99..64206eb 100644
> > --- a/src/cl_api.c
> > +++ b/src/cl_api.c
> > @@ -3001,6 +3001,7 @@ clEnqueueNDRangeKernel(cl_command_queue
> command_queue,
> >      err = cl_command_queue_flush(command_queue);
> >    }
> >
> > +error:
> >    if(b_output_kernel_perf)
> >    {
> >      if(kernel->program->build_opts != NULL) @@ -3008,7 +3009,7 @@
> > clEnqueueNDRangeKernel(cl_command_queue  command_queue,
> >      else
> >        time_end(command_queue->ctx, cl_kernel_get_name(kernel), "",
> command_queue);
> >    }
> > -error:
> > +
> 
> The above change is to fix a dead lock when enable kernel performance
> measurement and ran into error in cl_command_queue_ND_range(). Forgot
> to mention it in the commit log.
> 
> Thanks,
> Zhigang Gong.
> 
> >    return err;
> >  }
> >
> > diff --git a/src/cl_command_queue_gen7.c
> b/src/cl_command_queue_gen7.c
> > index 2edc3be..f0ee20a 100644
> > --- a/src/cl_command_queue_gen7.c
> > +++ b/src/cl_command_queue_gen7.c
> > @@ -329,7 +329,7 @@
> cl_command_queue_ND_range_gen7(cl_command_queue
> > queue,
> >
> >    /* Compute the number of HW threads we need */
> >    if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3,
> &local_sz) != CL_SUCCESS)) {
> > -    fprintf(stderr, "Beignet: Work group size exceed Kerne's work group
> size.\n");
> > +    fprintf(stderr, "Beignet: Work group size exceed Kernel's work
> > + group size.\n");
> >      return err;
> >    }
> >    kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
> > diff --git a/src/cl_device_id.c b/src/cl_device_id.c index
> > 4551aa8..8186ac8 100644
> > --- a/src/cl_device_id.c
> > +++ b/src/cl_device_id.c
> > @@ -966,7 +966,16 @@ cl_get_kernel_workgroup_info(cl_kernel kernel,
> >          return CL_SUCCESS;
> >        }
> >      }
> > -    DECL_FIELD(PREFERRED_WORK_GROUP_SIZE_MULTIPLE, device-
> >preferred_wg_sz_mul)
> > +    case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
> > +    {
> > +      if (param_value && param_value_size < sizeof(size_t))
> > +        return CL_INVALID_VALUE;
> > +      if (param_value_size_ret != NULL)
> > +        *param_value_size_ret = sizeof(size_t);
> > +      if (param_value)
> > +        *(size_t*)param_value = interp_kernel_get_simd_width(kernel-
> >opaque);
> > +      return CL_SUCCESS;
> > +    }
> >      case CL_KERNEL_LOCAL_MEM_SIZE:
> >      {
> >        size_t local_mem_sz =
> > interp_kernel_get_slm_size(kernel->opaque) + kernel->local_mem_sz;
> > diff --git a/src/cl_device_id.h b/src/cl_device_id.h index
> > 4a923ef..c5f9e57 100644
> > --- a/src/cl_device_id.h
> > +++ b/src/cl_device_id.h
> > @@ -108,8 +108,6 @@ struct _cl_device_id {
> >    size_t driver_version_sz;
> >    size_t spir_versions_sz;
> >    size_t built_in_kernels_sz;
> > -  /* Kernel specific info that we're assigning statically */
> > -  size_t preferred_wg_sz_mul;
> >    /* SubDevice specific info */
> >    cl_device_id parent_device;
> >    cl_uint      partition_max_sub_device;
> > diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h index
> > de7a636..12987b7 100644
> > --- a/src/cl_gt_device.h
> > +++ b/src/cl_gt_device.h
> > @@ -39,7 +39,6 @@
> >  .native_vector_width_float = 4,
> >  .native_vector_width_double = 2,
> >  .native_vector_width_half = 8,
> > -.preferred_wg_sz_mul = 16,
> >  .address_bits = 32,
> >  .max_mem_alloc_size = 512 * 1024 * 1024,  .image_support = CL_TRUE,
> > --
> > 1.9.1
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/beignet
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list