[Beignet] [PATCH] Fix the bug of 1D array slice pitch

Zhigang Gong zhigang.gong at linux.intel.com
Tue Oct 21 21:57:19 PDT 2014


One minor comment, there is no user of tile_sz.
We can just simply remove it and don't confuse to
use dim 2 for tile_sz. We can simply use dim 0
for horizontal alignment and 1 for vertical aligment.

On Tue, Oct 21, 2014 at 09:02:27PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
> 
> For BDW, the vertical align is 4 at least.
> This cause the slice pitch twice as big as
> the Gen7 for 1D buffer array.
> Because the buffer tiling alignment may change
> for different GENs, we move it from run time to
> intel driver.
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  src/cl_driver.h          |    3 +++
>  src/cl_driver_defs.c     |    1 +
>  src/cl_mem.c             |   19 ++++++-------------
>  src/intel/intel_driver.c |   45 +++++++++++++++++++++++++++++++++++++++++++++
>  src/intel/intel_gpgpu.c  |    2 +-
>  5 files changed, 56 insertions(+), 14 deletions(-)
> 
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index e973ba5..0603089 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -360,6 +360,9 @@ extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
>  typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd);
>  extern cl_buffer_get_fd_cb *cl_buffer_get_fd;
>  
> +typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim);
> +extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
> +
>  /* Get the device id */
>  typedef int (cl_driver_get_device_id_cb)(void);
>  extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index 72f25d9..665dad2 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -48,6 +48,7 @@ LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
>  LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL;
>  LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL;
>  LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL;
> +LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL;
>  
>  /* cl_khr_gl_sharing */
>  LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL;
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 077f1d7..59265a3 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -610,13 +610,6 @@ cl_mem_copy_image(struct _cl_mem_image *image,
>    cl_mem_unmap_auto((cl_mem)image);
>  }
>  
> -static const uint32_t tile_sz = 4096; /* 4KB per tile */
> -static const uint32_t tilex_w = 512;  /* tileX width in bytes */
> -static const uint32_t tilex_h = 8;    /* tileX height in number of rows */
> -static const uint32_t tiley_w = 128;  /* tileY width in bytes */
> -static const uint32_t tiley_h = 32;   /* tileY height in number of rows */
> -static const uint32_t valign = 2;     /* vertical alignment is 2. */
> -
>  cl_image_tiling_t cl_get_default_tiling(void)
>  {
>    static int initialized = 0;
> @@ -749,13 +742,13 @@ _cl_mem_new_image(cl_context ctx,
>    /* Tiling requires to align both pitch and height */
>    if (tiling == CL_NO_TILE) {
>      aligned_pitch = w * bpp;
> -    aligned_h  = ALIGN(h, valign);
> +    aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
>    } else if (tiling == CL_TILE_X) {
> -    aligned_pitch = ALIGN(w * bpp, tilex_w);
> -    aligned_h     = ALIGN(h, tilex_h);
> +    aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
> +    aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
>    } else if (tiling == CL_TILE_Y) {
> -    aligned_pitch = ALIGN(w * bpp, tiley_w);
> -    aligned_h     = ALIGN(h, tiley_h);
> +    aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 0));
> +    aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 1));
>    }
>  
>    sz = aligned_pitch * aligned_h * depth;
> @@ -779,7 +772,7 @@ _cl_mem_new_image(cl_context ctx,
>        image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
>      aligned_slice_pitch = 0;
>    else
> -    aligned_slice_pitch = aligned_pitch * ALIGN(h, 2);
> +    aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
>  
>    cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt,
>                      intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling,
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index 2c2ed5f..cb466ab 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -476,6 +476,50 @@ static int get_cl_tiling(uint32_t drm_tiling)
>    return CL_NO_TILE;
>  }
>  
> +static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
> +{
> +  uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
> +  uint32_t ret = 0;
> +
> +  switch (tiling_mode) {
> +  case CL_TILE_X:
> +    if (dim == 0) { //tileX width in bytes
> +      ret = 512;
> +    } else if (dim == 1) { //tileX height in number of rows
> +      ret = 8;
> +    } else if (dim == 2) { //tile SZ
> +      ret = 4096;
> +    } else
> +      assert(0);
> +    break;
> +
> +  case CL_TILE_Y:
> +    if (dim == 0) { //tileY width in bytes
> +      ret = 128;
> +    } else if (dim == 1) { //tileY height in number of rows
> +      ret = 32;
> +    } else if (dim == 2) { //tile SZ
> +      ret = 4096;
> +    } else
> +      assert(0);
> +    break;
> +
> +  case CL_NO_TILE:
> +    if (dim == 1) { //vertical alignment
> +      if (gen_ver == 8)
> +        ret = 4;
> +      else
> +        ret = 2;
> +    } else if (dim == 2) { //tile SZ
> +      ret = 4096;
> +    } else
> +      assert(0);
> +    break;
> +  }
> +
> +  return ret;
> +}
> +
>  #if defined(HAS_EGL)
>  #include "intel_dri_resource_sharing.h"
>  #include "cl_image.h"
> @@ -741,5 +785,6 @@ intel_setup_callbacks(void)
>    cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
>    cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
>    cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
> +  cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
>    intel_set_gpgpu_callbacks(intel_get_device_id());
>  }
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 167d8d9..d379768 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -1094,6 +1094,7 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
>    ss->ss0.surface_format = format;
>    if (intel_is_surface_array(type)) {
>      ss->ss0.surface_array = 1;
> +    ss->ss1.surface_qpitch = 1;
>    }
>    ss->ss0.horizontal_alignment = 1;
>    ss->ss0.vertical_alignment = 1;
> @@ -1117,7 +1118,6 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
>    ss->ss3.surface_pitch = pitch - 1;
>  
>    ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
> -  ss->ss7.red_clear_color = 1;
>    ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
>    ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
>    ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
> -- 
> 1.7.9.5
> 
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list