[Beignet] [PATCH v2] runtime: fix potential curbe allocation issue.
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Jul 1 00:06:38 PDT 2014
Please ignore this version. Forgot to commit the new code.
A new version has been sent minutes ago.
On Tue, Jul 01, 2014 at 02:54:07PM +0800, Zhigang Gong wrote:
> According to spec, different platforms have different curbe
> allocation restrication. The previous code set the curbe
> allocated size to 480 statically which is not correct.
>
> This patch change to always set the curbe entry num to 64
> which is the maximum work group size. And set proper curbe
> allocation size according to the platform's hard limitation
> and a relatively reasonable kernel argument usage limitation.
>
> v2:
> when we call load_vte_state, we already know the eaxctly constant urb
> size used in the current kernel. We could choose a smallest valid curbe
> size for this kernel. And if the size exceed the hardware limitation,
> we report it as a warning here.
>
> Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
> ---
> src/cl_gt_device.h | 2 +-
> src/intel/intel_gpgpu.c | 41 ++++++++++++++++++++++++++++-------------
> 2 files changed, 29 insertions(+), 14 deletions(-)
>
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index 63c9047..97ba7e2 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -39,7 +39,7 @@
> .address_bits = 32,
> .max_mem_alloc_size = 256 * 1024 * 1024,
> .image_support = CL_TRUE,
> -.max_read_image_args = 128,
> +.max_read_image_args = 16,
> .max_write_image_args = 8,
> .image_max_array_size = 2048,
> .image2d_max_width = 8192,
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index d403aa0..48e2769 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -116,7 +116,7 @@ struct intel_gpgpu
> struct {
> uint32_t num_cs_entries;
> uint32_t size_cs_entry; /* size of one entry in 512bit elements */
> - } urb;
> + } curb;
>
> uint32_t max_threads; /* max threads requested by the user */
> };
> @@ -275,6 +275,22 @@ uint32_t intel_gpgpu_get_scratch_index_gen75(uint32_t size) {
> return index;
> }
>
> +#define MAX_KERNEL_ARG_SIZE (32 * 4 + 24 * 4 + 5 * 64) * 64 // 32 integer arguments, 24 uniform special register and 5 vector special register.
> +
> +LOCAL cl_int
> +cl_get_max_curbe_size(uint32_t device_id)
> +{
> + int max_curbe_size;
> + if (IS_BAYTRAIL_T(device_id) ||
> + IS_IVB_GT1(device_id))
> + max_curbe_size = 992;
> + else
> + max_curbe_size = 2016;
> +
> + return (max_curbe_size*32) > MAX_KERNEL_ARG_SIZE ?
> + (MAX_KERNEL_ARG_SIZE / 32) : max_curbe_size;
> +}
> +
> static void
> intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
> {
> @@ -293,10 +309,10 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
> OUT_BATCH(gpgpu->batch, 0);
> }
> /* max_thread | urb entries | (reset_gateway|bypass_gate_way | gpgpu_mode) */
> - OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (64 << 8) | 0xc4);
> + OUT_BATCH(gpgpu->batch, 0 | ((gpgpu->max_threads - 1) << 16) | (0 << 8) | 0xc4);
> OUT_BATCH(gpgpu->batch, 0);
> /* curbe_size */
> - OUT_BATCH(gpgpu->batch, 480);
> + OUT_BATCH(gpgpu->batch, cl_get_max_curbe_size(gpgpu->drv->device_id));
> OUT_BATCH(gpgpu->batch, 0);
> OUT_BATCH(gpgpu->batch, 0);
> OUT_BATCH(gpgpu->batch, 0);
> @@ -306,17 +322,16 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
> static void
> intel_gpgpu_load_curbe_buffer(intel_gpgpu_t *gpgpu)
> {
> + int curbe_size = gpgpu->curb.size_cs_entry * gpgpu->curb.num_cs_entries;
> + if (curbe_size > cl_get_max_curbe_size(gpgpu->drv->device_id)) {
> + curbe_size = cl_get_max_curbe_size(gpgpu->drv->device_id);
> + fprintf(stderr, "warning, curbe size exceed limitation.\n");
> + }
> BEGIN_BATCH(gpgpu->batch, 4);
> OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2)); /* length-2 */
> OUT_BATCH(gpgpu->batch, 0); /* mbz */
> -// XXX
> -#if 1
> OUT_BATCH(gpgpu->batch,
> - gpgpu->urb.size_cs_entry*
> - gpgpu->urb.num_cs_entries*32);
> -#else
> - OUT_BATCH(gpgpu->batch, 5120);
> -#endif
> + curbe_size * 32);
> OUT_RELOC(gpgpu->batch, gpgpu->aux_buf.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, gpgpu->aux_offset.curbe_offset);
> ADVANCE_BATCH(gpgpu->batch);
> }
> @@ -577,8 +592,8 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
> gpgpu->sampler_bitmap = ~((1 << max_sampler_n) - 1);
>
> /* URB */
> - gpgpu->urb.num_cs_entries = max_threads;
> - gpgpu->urb.size_cs_entry = size_cs_entry;
> + gpgpu->curb.num_cs_entries = 64;
> + gpgpu->curb.size_cs_entry = size_cs_entry;
> gpgpu->max_threads = max_threads;
>
> if (gpgpu->printf_b.ibo)
> @@ -616,7 +631,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
> //curbe must be 32 bytes aligned
> size_aux = ALIGN(size_aux, 32);
> gpgpu->aux_offset.curbe_offset = size_aux;
> - size_aux += gpgpu->urb.num_cs_entries * gpgpu->urb.size_cs_entry * 64;
> + size_aux += gpgpu->curb.num_cs_entries * gpgpu->curb.size_cs_entry * 32;
>
> //idrt must be 32 bytes aligned
> size_aux = ALIGN(size_aux, 32);
> --
> 1.8.3.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list