[Beignet] [PATCH 3/3] GBE/CL: use 2D image to implement large image1D_buffer.

Zhigang Gong zhigang.gong at intel.com
Sun Dec 14 17:02:24 PST 2014


From: Zhigang Gong <zhigang.gong at linux.intel.com>

Per OpenCL spec, the minimum CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is 65536
which is too large for 1D surface on Gen platforms.
Have to use a 2D surface to implement it. As OpenCL spec only allows
the image1d_t to be accessed via default sampler, it is doable as it
will never use a float coordinates and never use linear non-nearest
filters.

Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
 backend/src/libocl/src/ocl_image.cl | 20 +++++++++++---
 src/cl_gt_device.h                  |  2 +-
 src/cl_mem.c                        | 52 ++++++++++++++++++++++++++++++-------
 src/cl_mem.h                        |  6 +++++
 src/intel/intel_gpgpu.c             |  2 +-
 5 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index 8777d9f..8bbd1e2 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -52,7 +52,7 @@
   OVERLOADABLE int __gen_ocl_get_image_depth(image_type image);                           \
 
 DECL_GEN_OCL_RW_IMAGE(image1d_t, 1)
-DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 1)
+DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 2)
 DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 2)
 DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 4)
 DECL_GEN_OCL_RW_IMAGE(image2d_t, 2)
@@ -370,9 +370,23 @@ DECL_IMAGE_TYPE(image3d_t, 3)
 DECL_IMAGE_TYPE(image2d_array_t, 4)
 DECL_IMAGE_TYPE(image2d_array_t, 3)
 
+#define DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image_type, image_data_type,       \
+                                  suffix, coord_type)                         \
+  OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image,       \
+                                               coord_type coord)              \
+  {                                                                           \
+    sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+                               | CLK_FILTER_NEAREST;                          \
+    int2 effectCoord;                                                         \
+    effectCoord.s0 = coord % 8192;                                            \
+    effectCoord.s1 = coord / 8192;                                            \
+    return __gen_ocl_read_image ##suffix(                                     \
+             cl_image, defaultSampler, effectCoord, 0);                       \
+  }
+
 #define DECL_IMAGE_1DBuffer(int_clamping_fix, image_data_type, suffix)        \
-  DECL_READ_IMAGE_NOSAMPLER(image1d_buffer_t, image_data_type,                \
-                            suffix, int)                                      \
+  DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image1d_buffer_t, image_data_type,       \
+                                     suffix, int)                             \
   DECL_WRITE_IMAGE(image1d_buffer_t, image_data_type, suffix, int)
 
 DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, int4, i)
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index ed19f10..4faa15a 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -50,7 +50,7 @@
 .image3d_max_width = 8192,
 .image3d_max_height = 8192,
 .image3d_max_depth = 2048,
-.image_mem_size = 8192,
+.image_mem_size = 65536,
 .max_samplers = 16,
 .mem_base_addr_align = sizeof(cl_long) * 16 * 8,
 .min_data_type_align_size = sizeof(cl_long) * 16,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3055bea..3225fd2 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -190,10 +190,18 @@ cl_get_image_info(cl_mem mem,
     *(size_t *)param_value = image->slice_pitch;
     break;
   case CL_IMAGE_WIDTH:
-    *(size_t *)param_value = image->w;
+
+    if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
+      struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image*) image;
+      *(size_t *)param_value = buffer1d_image->size;
+    } else
+      *(size_t *)param_value = image->w;
     break;
   case CL_IMAGE_HEIGHT:
-    *(size_t *)param_value = IS_1D(image) ? 0 : image->h;
+    if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE)
+      *(size_t *)param_value = 0;
+    else
+      *(size_t *)param_value = IS_1D(image) ? 0 : image->h;
     break;
   case CL_IMAGE_DEPTH:
     *(size_t *)param_value = IS_3D(image) ? image->depth : 0;
@@ -243,6 +251,10 @@ cl_mem_allocate(enum cl_mem_type type,
     struct _cl_mem_gl_image *gl_image = NULL;
     TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image));
     mem = &gl_image->base.base;
+  } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
+    struct _cl_mem_buffer1d_image *buffer1d_image = NULL;
+    TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image));
+    mem = &buffer1d_image->base.base;
   } else {
     struct _cl_mem_buffer *buffer = NULL;
     TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer));
@@ -678,6 +690,7 @@ _cl_mem_new_image(cl_context ctx,
   cl_mem_object_type image_type = orig_image_type;
   uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
   size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
+  size_t origin_width = w;  // for image1d buffer work around.
   cl_image_tiling_t tiling = CL_NO_TILE;
 
   /* Check flags consistency */
@@ -710,8 +723,7 @@ _cl_mem_new_image(cl_context ctx,
       image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)))
     DO_IMAGE_ERROR;
 
-  if (image_type == CL_MEM_OBJECT_IMAGE1D ||
-      image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+  if (image_type == CL_MEM_OBJECT_IMAGE1D) {
     size_t min_pitch = bpp * w;
     if (data && pitch == 0)
       pitch = min_pitch;
@@ -724,19 +736,30 @@ _cl_mem_new_image(cl_context ctx,
     if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
     tiling = CL_NO_TILE;
-  } else if (image_type == CL_MEM_OBJECT_IMAGE2D) {
+  } else if (image_type == CL_MEM_OBJECT_IMAGE2D ||
+             image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+
+    if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+      if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR;
+      /* This is an image1d buffer which exceeds normal image size restrication
+         We have to use a 2D image to simulate this 1D image. */
+      h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
+      w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
+      tiling = CL_NO_TILE;
+    } else if (cl_driver_get_ver(ctx->drv) != 6) {
+      /* Pick up tiling mode (we do only linear on SNB) */
+      tiling = cl_get_default_tiling(ctx->drv);
+    }
+
     size_t min_pitch = bpp * w;
     if (data && pitch == 0)
       pitch = min_pitch;
+
     if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
     if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
     if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
     if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
 
-    /* Pick up tiling mode (we do only linear on SNB) */
-    if (cl_driver_get_ver(ctx->drv) != 6)
-      tiling = cl_get_default_tiling(ctx->drv);
-
     depth = 1;
   } else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
              image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
@@ -791,7 +814,16 @@ _cl_mem_new_image(cl_context ctx,
     sz = aligned_pitch * aligned_h * depth;
   }
 
-  mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+  if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
+    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+  else {
+    mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+    if (mem != NULL && err == CL_SUCCESS) {
+      struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image *)mem;
+      buffer1d_image->size = origin_width;;
+    }
+  }
+
   if (mem == NULL || err != CL_SUCCESS)
     goto error;
 
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1641dcc..fd50220 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -72,6 +72,7 @@ enum cl_mem_type {
   CL_MEM_SUBBUFFER_TYPE,
   CL_MEM_IMAGE_TYPE,
   CL_MEM_GL_IMAGE_TYPE,
+  CL_MEM_BUFFER1D_IMAGE_TYPE
 };
 #define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE)
 #define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE)
@@ -117,6 +118,11 @@ struct _cl_mem_gl_image {
   uint32_t texture;
 };
 
+struct _cl_mem_buffer1d_image {
+  struct _cl_mem_image base;
+  uint32_t size;
+};
+
 inline static void
 cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h,
                   cl_mem_object_type image_type,
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 0df7876..c80a11b 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1003,11 +1003,11 @@ static int
 intel_get_surface_type(cl_mem_object_type type)
 {
   switch (type) {
-  case CL_MEM_OBJECT_IMAGE1D_BUFFER:
   case CL_MEM_OBJECT_IMAGE1D:
   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
     return I965_SURFACE_1D;
 
+  case CL_MEM_OBJECT_IMAGE1D_BUFFER:
   case CL_MEM_OBJECT_IMAGE2D:
   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
     return I965_SURFACE_2D;
-- 
1.8.3.2



More information about the Beignet mailing list