[Beignet] [Patch V2] BDW: Change BDW's max work group size to 512.

Zhigang Gong zhigang.gong at linux.intel.com
Wed Oct 15 23:14:01 PDT 2014


The first version has been pushed, please split the error code fix into another
patch and send again. Thanks.

On Wed, Oct 15, 2014 at 04:44:16PM +0800, Yang Rong wrote:
> Opencv only query and use device max work group size, when SLM/Barrier enable, BDW
> can't fill 1024 work group in one subslice, even in SIMD16. Change device's max work
> group size temp.
> 
> V2: Return error code when work group error instead of exit.
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_command_queue_gen7.c | 5 ++++-
>  src/cl_device_id.c          | 8 +++++---
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
> index b020540..c11e25d 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -317,7 +317,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
>    kernel.use_slm = interp_kernel_use_slm(ker->opaque);
>  
>    /* Compute the number of HW threads we need */
> -  TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz);
> +  if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, &local_sz) != CL_SUCCESS)) {
> +    fprintf(stderr, "Beignet: Work group size exceed Kerne's work group size.\n");
> +    return err;
> +  }
>    kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
>    kernel.curbe_sz = cst_sz;
>  
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index 7944ca4..c911c23 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -110,7 +110,7 @@ static struct _cl_device_id intel_brw_gt1_device = {
>    .max_thread_per_unit = 7,
>    .sub_slice_count = 2,
>    .max_work_item_sizes = {1024, 1024, 1024},
> -  .max_work_group_size = 1024,
> +  .max_work_group_size = 512,
>    .max_clock_frequency = 1000,
>  #include "cl_gen75_device.h"
>  };
> @@ -121,7 +121,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
>    .max_thread_per_unit = 7,
>    .sub_slice_count = 3,
>    .max_work_item_sizes = {1024, 1024, 1024},
> -  .max_work_group_size = 1024,
> +  .max_work_group_size = 512,
>    .max_clock_frequency = 1000,
>  #include "cl_gen75_device.h"
>  };
> @@ -132,7 +132,7 @@ static struct _cl_device_id intel_brw_gt3_device = {
>    .max_thread_per_unit = 7,
>    .sub_slice_count = 6,
>    .max_work_item_sizes = {1024, 1024, 1024},
> -  .max_work_group_size = 1024,
> +  .max_work_group_size = 512,
>    .max_clock_frequency = 1000,
>  #include "cl_gen75_device.h"
>  };
> @@ -648,6 +648,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
>      if(thread_cnt > 64)
>        thread_cnt = 64;
>      work_group_size = thread_cnt * simd_width;
> +    if(work_group_size > kernel->program->ctx->device->max_work_group_size)
> +      work_group_size = kernel->program->ctx->device->max_work_group_size;
>    }
>    return work_group_size;
>  }
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list