[Beignet] [PATCH v2] runtime: fix potential curbe allocation issue.

Tue Jul 1 00:06:38 PDT 2014

Please ignore this version. Forgot to commit the new code.
A new version has been sent minutes ago.

On Tue, Jul 01, 2014 at 02:54:07PM +0800, Zhigang Gong wrote:
> According to spec, different platforms have different curbe
> allocation restrication. The previous code set the curbe
> allocated size to 480 statically which is not correct.
> 
> This patch change to always set the curbe entry num to 64
> which is the maximum work group size. And set proper curbe
> allocation size according to the platform's hard limitation
> and a relatively reasonable kernel argument usage limitation.
> 
> v2:
> when we call load_vte_state, we already know the eaxctly constant urb
> size used in the current kernel. We could choose a smallest valid curbe
> size for this kernel. And if the size exceed the hardware limitation,
> we report it as a warning here.
> 
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
>  src/cl_gt_device.h      |  2 +-
>  src/intel/intel_gpgpu.c | 41 ++++++++++++++++++++++++++++-------------
>  2 files changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index 63c9047..97ba7e2 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -39,7 +39,7 @@
>  .address_bits = 32,
>  .max_mem_alloc_size = 256 * 1024 * 1024,
>  .image_support = CL_TRUE,
> -.max_read_image_args = 128,
> +.max_read_image_args = 16,
>  .max_write_image_args = 8,
>  .image_max_array_size = 2048,
>  .image2d_max_width = 8192,
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index d403aa0..48e2769 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -116,7 +116,7 @@ struct intel_gpgpu
>    struct {
>      uint32_t num_cs_entries;
>      uint32_t size_cs_entry;  /* size of one entry in 512bit elements */
> -  } urb;
> +  } curb;
>  
>    uint32_t max_threads;      /* max threads requested by the user */
>  };
> @@ -275,6 +275,22 @@ uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t size) {
>      return index;
>  }
>  
> +#define MAX_KERNEL_ARG_SIZE (32 * 4 + 24 * 4 + 5 * 64) * 64 // 32 integer arguments, 24 uniform special register and 5 vector special register.
> +
> +LOCAL cl_int
> +cl_get_max_curbe_size(uint32_t device_id)
> +{
> +  int max_curbe_size;
> +  if (IS_BAYTRAIL_T(device_id) ||
> +      IS_IVB_GT1(device_id))
> +    max_curbe_size = 992;
> +  else
> +    max_curbe_size = 2016;
> +
> +  return (max_curbe_size*32) > MAX_KERNEL_ARG_SIZE ?
> +         (MAX_KERNEL_ARG_SIZE / 32) : max_curbe_size;
> +}
> +
>  static void
>  intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
>  {
> @@ -293,10 +309,10 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
>      OUT_BATCH(gpgpu->batch, 0);
>    }
>    /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
> -  OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (64 << 8) | 0xc4);
> +  OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 8) | 0xc4);
>    OUT_BATCH(gpgpu->batch, 0);
>    /* curbe_size */
> -  OUT_BATCH(gpgpu->batch, 480);
> +  OUT_BATCH(gpgpu->batch, cl_get_max_curbe_size(gpgpu->drv->device_id));
>    OUT_BATCH(gpgpu->batch, 0);
>    OUT_BATCH(gpgpu->batch, 0);
>    OUT_BATCH(gpgpu->batch, 0);
> @@ -306,17 +322,16 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
>  static void
>  intel_gpgpu_load_curbe_buffer(intel_gpgpu_t *gpgpu)
>  {
> +  int curbe_size = gpgpu->curb.size_cs_entry * gpgpu->curb.num_cs_entries;
> +  if (curbe_size > cl_get_max_curbe_size(gpgpu->drv->device_id)) {
> +    curbe_size = cl_get_max_curbe_size(gpgpu->drv->device_id);
> +    fprintf(stderr, "warning, curbe size exceed limitation.\n");
> +  }
>    BEGIN_BATCH(gpgpu->batch, 4);
>    OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2));  /* length-2 */
>    OUT_BATCH(gpgpu->batch, 0);                     /* mbz */
> -// XXX
> -#if 1
>    OUT_BATCH(gpgpu->batch,
> -            gpgpu->urb.size_cs_entry*
> -            gpgpu->urb.num_cs_entries*32);
> -#else
> -  OUT_BATCH(gpgpu->batch, 5120);
> -#endif
> +            curbe_size * 32);
>    OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.curbe_offset);
>    ADVANCE_BATCH(gpgpu->batch);
>  }
> @@ -577,8 +592,8 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
>    gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
>  
>    /* URB */
> -  gpgpu->urb.num_cs_entries = max_threads;
> -  gpgpu->urb.size_cs_entry = size_cs_entry;
> +  gpgpu->curb.num_cs_entries = 64;
> +  gpgpu->curb.size_cs_entry = size_cs_entry;
>    gpgpu->max_threads = max_threads;
>  
>    if (gpgpu->printf_b.ibo)
> @@ -616,7 +631,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
>    //curbe must be 32 bytes aligned
>    size_aux = ALIGN(size_aux, 32);
>    gpgpu->aux_offset.curbe_offset = size_aux;
> -  size_aux += gpgpu->urb.num_cs_entries * gpgpu->urb.size_cs_entry * 64;
> +  size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 32;
>  
>    //idrt must be 32 bytes aligned
>    size_aux = ALIGN(size_aux, 32);
> -- 
> 1.8.3.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet