[Beignet] [Patch V2] BDW: Change BDW's max work group size to 512.

Yang, Rong R rong.r.yang at intel.com
Thu Oct 16 00:58:49 PDT 2014


I have send a new patch.

-----Original Message-----
From: Zhigang Gong [mailto:zhigang.gong at linux.intel.com] 
Sent: Thursday, October 16, 2014 14:14
To: Yang, Rong R
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [Patch V2] BDW: Change BDW's max work group size to 512.

The first version has been pushed, please split the error code fix into another patch and send again. Thanks.

On Wed, Oct 15, 2014 at 04:44:16PM +0800, Yang Rong wrote:
> Opencv only query and use device max work group size, when SLM/Barrier 
> enable, BDW can't fill 1024 work group in one subslice, even in 
> SIMD16. Change device's max work group size temp.
> 
> V2: Return error code when work group error instead of exit.
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_command_queue_gen7.c | 5 ++++-
>  src/cl_device_id.c          | 8 +++++---
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c 
> index b020540..c11e25d 100644
> --- a/src/cl_command_queue_gen7.c
> +++ b/src/cl_command_queue_gen7.c
> @@ -317,7 +317,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
>    kernel.use_slm = interp_kernel_use_slm(ker->opaque);
>  
>    /* Compute the number of HW threads we need */
> -  TRY (cl_kernel_work_group_sz, ker, local_wk_sz, 3, &local_sz);
> +  if(UNLIKELY(err = cl_kernel_work_group_sz(ker, local_wk_sz, 3, &local_sz) != CL_SUCCESS)) {
> +    fprintf(stderr, "Beignet: Work group size exceed Kerne's work group size.\n");
> +    return err;
> +  }
>    kernel.thread_n = thread_n = (local_sz + simd_sz - 1) / simd_sz;
>    kernel.curbe_sz = cst_sz;
>  
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c index 
> 7944ca4..c911c23 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -110,7 +110,7 @@ static struct _cl_device_id intel_brw_gt1_device = {
>    .max_thread_per_unit = 7,
>    .sub_slice_count = 2,
>    .max_work_item_sizes = {1024, 1024, 1024},
> -  .max_work_group_size = 1024,
> +  .max_work_group_size = 512,
>    .max_clock_frequency = 1000,
>  #include "cl_gen75_device.h"
>  };
> @@ -121,7 +121,7 @@ static struct _cl_device_id intel_brw_gt2_device = {
>    .max_thread_per_unit = 7,
>    .sub_slice_count = 3,
>    .max_work_item_sizes = {1024, 1024, 1024},
> -  .max_work_group_size = 1024,
> +  .max_work_group_size = 512,
>    .max_clock_frequency = 1000,
>  #include "cl_gen75_device.h"
>  };
> @@ -132,7 +132,7 @@ static struct _cl_device_id intel_brw_gt3_device = {
>    .max_thread_per_unit = 7,
>    .sub_slice_count = 6,
>    .max_work_item_sizes = {1024, 1024, 1024},
> -  .max_work_group_size = 1024,
> +  .max_work_group_size = 512,
>    .max_clock_frequency = 1000,
>  #include "cl_gen75_device.h"
>  };
> @@ -648,6 +648,8 @@ cl_get_kernel_max_wg_sz(cl_kernel kernel)
>      if(thread_cnt > 64)
>        thread_cnt = 64;
>      work_group_size = thread_cnt * simd_width;
> +    if(work_group_size > kernel->program->ctx->device->max_work_group_size)
> +      work_group_size = 
> + kernel->program->ctx->device->max_work_group_size;
>    }
>    return work_group_size;
>  }
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list