[Beignet] [PATCH] Fix the bug of 1D array slice pitch

junyan.he at inbox.com junyan.he at inbox.com
Tue Oct 21 06:02:27 PDT 2014


From: Junyan He <junyan.he at linux.intel.com>

For BDW, the vertical align is 4 at least.
This cause the slice pitch twice as big as
the Gen7 for 1D buffer array.
Because the buffer tiling alignment may change
for different GENs, we move it from run time to
intel driver.

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 src/cl_driver.h          |    3 +++
 src/cl_driver_defs.c     |    1 +
 src/cl_mem.c             |   19 ++++++-------------
 src/intel/intel_driver.c |   45 +++++++++++++++++++++++++++++++++++++++++++++
 src/intel/intel_gpgpu.c  |    2 +-
 5 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/src/cl_driver.h b/src/cl_driver.h
index e973ba5..0603089 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -360,6 +360,9 @@ extern cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering;
 typedef int (cl_buffer_get_fd_cb)(cl_buffer, int *fd);
 extern cl_buffer_get_fd_cb *cl_buffer_get_fd;
 
+typedef int (cl_buffer_get_tiling_align_cb)(cl_context ctx, uint32_t tiling_mode, uint32_t dim);
+extern cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align;
+
 /* Get the device id */
 typedef int (cl_driver_get_device_id_cb)(void);
 extern cl_driver_get_device_id_cb *cl_driver_get_device_id;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 72f25d9..665dad2 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -48,6 +48,7 @@ LOCAL cl_buffer_wait_rendering_cb *cl_buffer_wait_rendering = NULL;
 LOCAL cl_buffer_get_buffer_from_libva_cb *cl_buffer_get_buffer_from_libva = NULL;
 LOCAL cl_buffer_get_image_from_libva_cb *cl_buffer_get_image_from_libva = NULL;
 LOCAL cl_buffer_get_fd_cb *cl_buffer_get_fd = NULL;
+LOCAL cl_buffer_get_tiling_align_cb *cl_buffer_get_tiling_align = NULL;
 
 /* cl_khr_gl_sharing */
 LOCAL cl_gl_acquire_texture_cb *cl_gl_acquire_texture = NULL;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 077f1d7..59265a3 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -610,13 +610,6 @@ cl_mem_copy_image(struct _cl_mem_image *image,
   cl_mem_unmap_auto((cl_mem)image);
 }
 
-static const uint32_t tile_sz = 4096; /* 4KB per tile */
-static const uint32_t tilex_w = 512;  /* tileX width in bytes */
-static const uint32_t tilex_h = 8;    /* tileX height in number of rows */
-static const uint32_t tiley_w = 128;  /* tileY width in bytes */
-static const uint32_t tiley_h = 32;   /* tileY height in number of rows */
-static const uint32_t valign = 2;     /* vertical alignment is 2. */
-
 cl_image_tiling_t cl_get_default_tiling(void)
 {
   static int initialized = 0;
@@ -749,13 +742,13 @@ _cl_mem_new_image(cl_context ctx,
   /* Tiling requires to align both pitch and height */
   if (tiling == CL_NO_TILE) {
     aligned_pitch = w * bpp;
-    aligned_h  = ALIGN(h, valign);
+    aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
   } else if (tiling == CL_TILE_X) {
-    aligned_pitch = ALIGN(w * bpp, tilex_w);
-    aligned_h     = ALIGN(h, tilex_h);
+    aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
+    aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
   } else if (tiling == CL_TILE_Y) {
-    aligned_pitch = ALIGN(w * bpp, tiley_w);
-    aligned_h     = ALIGN(h, tiley_h);
+    aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 0));
+    aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_Y, 1));
   }
 
   sz = aligned_pitch * aligned_h * depth;
@@ -779,7 +772,7 @@ _cl_mem_new_image(cl_context ctx,
       image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
     aligned_slice_pitch = 0;
   else
-    aligned_slice_pitch = aligned_pitch * ALIGN(h, 2);
+    aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
 
   cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt,
                     intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling,
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 2c2ed5f..cb466ab 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -476,6 +476,50 @@ static int get_cl_tiling(uint32_t drm_tiling)
   return CL_NO_TILE;
 }
 
+static uint32_t intel_buffer_get_tiling_align(cl_context ctx, uint32_t tiling_mode, uint32_t dim)
+{
+  uint32_t gen_ver = ((intel_driver_t *)ctx->drv)->gen_ver;
+  uint32_t ret = 0;
+
+  switch (tiling_mode) {
+  case CL_TILE_X:
+    if (dim == 0) { //tileX width in bytes
+      ret = 512;
+    } else if (dim == 1) { //tileX height in number of rows
+      ret = 8;
+    } else if (dim == 2) { //tile SZ
+      ret = 4096;
+    } else
+      assert(0);
+    break;
+
+  case CL_TILE_Y:
+    if (dim == 0) { //tileY width in bytes
+      ret = 128;
+    } else if (dim == 1) { //tileY height in number of rows
+      ret = 32;
+    } else if (dim == 2) { //tile SZ
+      ret = 4096;
+    } else
+      assert(0);
+    break;
+
+  case CL_NO_TILE:
+    if (dim == 1) { //vertical alignment
+      if (gen_ver == 8)
+        ret = 4;
+      else
+        ret = 2;
+    } else if (dim == 2) { //tile SZ
+      ret = 4096;
+    } else
+      assert(0);
+    break;
+  }
+
+  return ret;
+}
+
 #if defined(HAS_EGL)
 #include "intel_dri_resource_sharing.h"
 #include "cl_image.h"
@@ -741,5 +785,6 @@ intel_setup_callbacks(void)
   cl_buffer_subdata = (cl_buffer_subdata_cb *) drm_intel_bo_subdata;
   cl_buffer_wait_rendering = (cl_buffer_wait_rendering_cb *) drm_intel_bo_wait_rendering;
   cl_buffer_get_fd = (cl_buffer_get_fd_cb *) drm_intel_bo_gem_export_to_prime;
+  cl_buffer_get_tiling_align = (cl_buffer_get_tiling_align_cb *)intel_buffer_get_tiling_align;
   intel_set_gpgpu_callbacks(intel_get_device_id());
 }
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 167d8d9..d379768 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1094,6 +1094,7 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
   ss->ss0.surface_format = format;
   if (intel_is_surface_array(type)) {
     ss->ss0.surface_array = 1;
+    ss->ss1.surface_qpitch = 1;
   }
   ss->ss0.horizontal_alignment = 1;
   ss->ss0.vertical_alignment = 1;
@@ -1117,7 +1118,6 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
   ss->ss3.surface_pitch = pitch - 1;
 
   ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
-  ss->ss7.red_clear_color = 1;
   ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
   ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
   ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
-- 
1.7.9.5





More information about the Beignet mailing list