[Beignet] [PATCH 2/4] add extensions intel_accelerator and basic intel_motion_estimation

Song, Ruiling ruiling.song at intel.com
Sun Sep 6 00:01:54 PDT 2015


> +    if (kernel->vme) {
> +        fixed_local_sz[0] = 16;
> +        fixed_local_sz[1] = 1;

Why it is 16? Does it work for all cases?

> -  if (global_work_size != NULL)
> +  if (kernel->vme) {
> +    fixed_global_sz[0] = (global_work_size[0]+15) / 16 * 16;
> +    fixed_global_sz[1] = (global_work_size[1]+15) / 16;
> +  } else {
>      for (i = 0; i < work_dim; ++i)
>        fixed_global_sz[i] = global_work_size[i];
> +  }
>    if (global_work_offset != NULL)
>      for (i = 0; i < work_dim; ++i)
>        fixed_global_off[i] = global_work_offset[i];



> @@ -140,10 +140,16 @@
> cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k)
> 
>      image = cl_mem_image(k->args[id].mem);
>      set_image_info(k->curbe, &k->images[i], image);
> -    cl_gpgpu_bind_image(gpgpu, k->images[i].idx, image->base.bo, image-
> >offset,
> -                        image->intel_fmt, image->image_type, image->bpp,
> -                        image->w, image->h, image->depth,
> -                        image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image-
> >tiling);

Please check for invalid image format.

> +    if(k->vme)
> +      cl_gpgpu_bind_image_for_vme(gpgpu, k->images[i].idx, image->base.bo,
> image->offset,
> +                          image->intel_fmt, image->image_type, image->bpp,
> +                          image->w, image->h, image->depth,
> +                          image->row_pitch, image->slice_pitch, (cl_gpgpu_tiling)image-
> >tiling);


>    /* Bind all samplers */
> -  cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz);
> +  if (ker->vme)
> +    cl_gpgpu_bind_vme_state(gpgpu, ker->accel);
> +  else
> +    cl_gpgpu_bind_sampler(gpgpu, ker->samplers, ker->sampler_sz);
Like sampler state, I hope we can gather the vme count used in the kernel, and don't need to set the state of all VMEs. 


> diff --git a/src/cl_context.h b/src/cl_context.h
> --- a/src/cl_extensions.c
> +++ b/src/cl_extensions.c
> @@ -40,7 +40,7 @@ void check_opt1_extension(cl_extensions_t *extensions)
>    int id;
>    for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++)
>    {
> -    if (id == EXT_ID(khr_icd))
> +    if (id == EXT_ID(khr_icd) || id == EXT_ID(intel_accelerator) || id ==
> EXT_ID(intel_motion_estimation))
>        extensions->extensions[id].base.ext_enabled = 1;
>  #if  LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 5
>      if (id == EXT_ID(khr_spir))
> @@ -63,7 +63,9 @@ check_gl_extension(cl_extensions_t *extensions) {
>  void
>  check_intel_extension(cl_extensions_t *extensions)
>  {
> -  /* Should put those map/unmap extensions here. */
> +  int id;
> +  for(id = INTEL_EXT_START_ID; id <= INTEL_EXT_END_ID; id++)
> +    extensions->extensions[id].base.ext_enabled = 1;

Is this a duplicate of code in check_op1_extension()?

> +
> +static void
> +intel_gpgpu_bind_vme_state_gen7(intel_gpgpu_t *gpgpu, cl_accelerator_intel
> accel)
> +{
> +  int index;

As I said, setting all the vme states is not a good idea, please refine this in next step.

> +  for(index = 0; index < GEN_MAX_VME_STATES; index++)
> +    intel_gpgpu_insert_vme_state_gen7(gpgpu, accel, index);
> +}
> +
>  static void

I have discussed with Chuanbo, src_grf0, src_grf1,... src_grf4 don't need to be passed in to __gen_ocl_vme(), they are not real arguments.

Thanks!
Ruiling
> +  vme_result = __gen_ocl_vme(src_image, ref_image,
> +                src_grf0, src_grf1, src_grf2, src_grf3, src_grf4,
> +                src_grf0_dw7, src_grf0_dw6, src_grf0_dw5, src_grf0_dw4,
> +                src_grf0_dw3, src_grf0_dw2, src_grf0_dw1, src_grf0_dw0,
> +                src_grf1_dw7, src_grf1_dw6, src_grf1_dw5, src_grf1_dw4,
> +                src_grf1_dw3, src_grf1_dw2, src_grf1_dw1, src_grf1_dw0,
> +                src_grf2_dw7, src_grf2_dw6, src_grf2_dw5, src_grf2_dw4,
> +                src_grf2_dw3, src_grf2_dw2, src_grf2_dw1, src_grf2_dw0,
> +                src_grf3_dw7, src_grf3_dw6, src_grf3_dw5, src_grf3_dw4,
> +                src_grf3_dw3, src_grf3_dw2, src_grf3_dw1, src_grf3_dw0,
> +                src_grf4_dw7, src_grf4_dw6, src_grf4_dw5, src_grf4_dw4,
> +                src_grf4_dw3, src_grf4_dw2, src_grf4_dw1, src_grf4_dw0,
> +                //msg_type, vme_search_path_lut, lut_sub,
> +                1, 0, 0);
> +
> +  barrier(CLK_LOCAL_MEM_FENCE);
> +
> +  int lid_x = get_local_id(0);
> +  uint write_back_grf1_dw0 = __gen_ocl_region(0, vme_result.s1);
> +  short2 val = as_short2( write_back_grf1_dw0 );
> +  int index = gid_1 * get_num_groups(0) + gid_0;
> +  if( lid_x == 0 ){
> +    motion_vector_buffer[index] = val;
> +  }
> +
> +}
> --
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list