[Beignet] [PATCH] Fix the bug of 1D array slice pitch
Zhigang Gong
zhigang.gong at linux.intel.com
Tue Oct 21 21:57:19 PDT 2014
One minor comment, there is no user of tile_sz.
We can just simply remove it and don't confuse to
use dim 2 for tile_sz. We can simply use dim 0
for horizontal alignment and 1 for vertical aligment.
On Tue, Oct 21, 2014 at 09:02:27PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
>
> For BDW, the vertical align is 4 at least.
> This cause the slice pitch twice as big as
> the Gen7 for 1D buffer array.
> Because the buffer tiling alignment may change
> for different GENs, we move it from run time to
> intel driver.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> src/cl_driver.h | 3 +++
> src/cl_driver_defs.c | 1 +
> src/cl_mem.c | 19 ++++++-------------
> src/intel/intel_driver.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
> src/intel/intel_gpgpu.c | 2 +-
> 5 files changed, 56 insertions(+), 14 deletions(-)
>
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index e973ba5..0603089 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -360,6 +360,9 @@ extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
> typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd);
> extern cl_buffer_get_fd_cb *cl_buffer_get_fd;
>
> +typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim);
> +extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
> +
> /* Get the device id */
> typedef int (cl_driver_get_device_id_cb)(void);
> extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index 72f25d9..665dad2 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -48,6 +48,7 @@ LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
> LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL;
> LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL;
> LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL;
> +LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL;
>
> /* cl_khr_gl_sharing */
> LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL;
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 077f1d7..59265a3 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -610,13 +610,6 @@ cl_mem_copy_image(struct _cl_mem_image *image,
> cl_mem_unmap_auto((cl_mem)image);
> }
>
> -static const uint32_t tile_sz = 4096; /* 4KB per tile */
> -static const uint32_t tilex_w = 512; /* tileX width in bytes */
> -static const uint32_t tilex_h = 8; /* tileX height in number of rows */
> -static const uint32_t tiley_w = 128; /* tileY width in bytes */
> -static const uint32_t tiley_h = 32; /* tileY height in number of rows */
> -static const uint32_t valign = 2; /* vertical alignment is 2. */
> -
> cl_image_tiling_t cl_get_default_tiling(void)
> {
> static int initialized = 0;
> @@ -749,13 +742,13 @@ _cl_mem_new_image(cl_context ctx,
> /* Tiling requires to align both pitch and height */
> if (tiling == CL_NO_TILE) {
> aligned_pitch = w * bpp;
> - aligned_h = ALIGN(h, valign);
> + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
> } else if (tiling == CL_TILE_X) {
> - aligned_pitch = ALIGN(w * bpp, tilex_w);
> - aligned_h = ALIGN(h, tilex_h);
> + aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
> + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
> } else if (tiling == CL_TILE_Y) {
> - aligned_pitch = ALIGN(w * bpp, tiley_w);
> - aligned_h = ALIGN(h, tiley_h);
> + aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 0));
> + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 1));
> }
>
> sz = aligned_pitch * aligned_h * depth;
> @@ -779,7 +772,7 @@ _cl_mem_new_image(cl_context ctx,
> image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
> aligned_slice_pitch = 0;
> else
> - aligned_slice_pitch = aligned_pitch * ALIGN(h, 2);
> + aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
>
> cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt,
> intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling,
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index 2c2ed5f..cb466ab 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -476,6 +476,50 @@ static int get_cl_tiling(uint32_t drm_tiling)
> return CL_NO_TILE;
> }
>
> +static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
> +{
> + uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
> + uint32_t ret = 0;
> +
> + switch (tiling_mode) {
> + case CL_TILE_X:
> + if (dim == 0) { //tileX width in bytes
> + ret = 512;
> + } else if (dim == 1) { //tileX height in number of rows
> + ret = 8;
> + } else if (dim == 2) { //tile SZ
> + ret = 4096;
> + } else
> + assert(0);
> + break;
> +
> + case CL_TILE_Y:
> + if (dim == 0) { //tileY width in bytes
> + ret = 128;
> + } else if (dim == 1) { //tileY height in number of rows
> + ret = 32;
> + } else if (dim == 2) { //tile SZ
> + ret = 4096;
> + } else
> + assert(0);
> + break;
> +
> + case CL_NO_TILE:
> + if (dim == 1) { //vertical alignment
> + if (gen_ver == 8)
> + ret = 4;
> + else
> + ret = 2;
> + } else if (dim == 2) { //tile SZ
> + ret = 4096;
> + } else
> + assert(0);
> + break;
> + }
> +
> + return ret;
> +}
> +
> #if defined(HAS_EGL)
> #include "intel_dri_resource_sharing.h"
> #include "cl_image.h"
> @@ -741,5 +785,6 @@ intel_setup_callbacks(void)
> cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
> cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
> cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
> + cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
> intel_set_gpgpu_callbacks(intel_get_device_id());
> }
> diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
> index 167d8d9..d379768 100644
> --- a/src/intel/intel_gpgpu.c
> +++ b/src/intel/intel_gpgpu.c
> @@ -1094,6 +1094,7 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
> ss->ss0.surface_format = format;
> if (intel_is_surface_array(type)) {
> ss->ss0.surface_array = 1;
> + ss->ss1.surface_qpitch = 1;
> }
> ss->ss0.horizontal_alignment = 1;
> ss->ss0.vertical_alignment = 1;
> @@ -1117,7 +1118,6 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
> ss->ss3.surface_pitch = pitch - 1;
>
> ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
> - ss->ss7.red_clear_color = 1;
> ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
> ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
> ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
> --
> 1.7.9.5
>
>
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list