[Beignet] [PATCH 3/3] GBE/CL: use 2D image to implement large image1D_buffer.
Zhigang Gong
zhigang.gong at intel.com
Sun Dec 14 17:02:24 PST 2014
From: Zhigang Gong <zhigang.gong at linux.intel.com>
Per OpenCL spec, the minimum CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is 65536
which is too large for 1D surface on Gen platforms.
Have to use a 2D surface to implement it. As OpenCL spec only allows
the image1d_t to be accessed via default sampler, it is doable as it
will never use a float coordinates and never use linear non-nearest
filters.
Signed-off-by: Zhigang Gong <zhigang.gong at linux.intel.com>
---
backend/src/libocl/src/ocl_image.cl | 20 +++++++++++---
src/cl_gt_device.h | 2 +-
src/cl_mem.c | 52 ++++++++++++++++++++++++++++++-------
src/cl_mem.h | 6 +++++
src/intel/intel_gpgpu.c | 2 +-
5 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/backend/src/libocl/src/ocl_image.cl b/backend/src/libocl/src/ocl_image.cl
index 8777d9f..8bbd1e2 100644
--- a/backend/src/libocl/src/ocl_image.cl
+++ b/backend/src/libocl/src/ocl_image.cl
@@ -52,7 +52,7 @@
OVERLOADABLE int __gen_ocl_get_image_depth(image_type image); \
DECL_GEN_OCL_RW_IMAGE(image1d_t, 1)
-DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 1)
+DECL_GEN_OCL_RW_IMAGE(image1d_buffer_t, 2)
DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 2)
DECL_GEN_OCL_RW_IMAGE(image1d_array_t, 4)
DECL_GEN_OCL_RW_IMAGE(image2d_t, 2)
@@ -370,9 +370,23 @@ DECL_IMAGE_TYPE(image3d_t, 3)
DECL_IMAGE_TYPE(image2d_array_t, 4)
DECL_IMAGE_TYPE(image2d_array_t, 3)
+#define DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image_type, image_data_type, \
+ suffix, coord_type) \
+ OVERLOADABLE image_data_type read_image ##suffix(image_type cl_image, \
+ coord_type coord) \
+ { \
+ sampler_t defaultSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE \
+ | CLK_FILTER_NEAREST; \
+ int2 effectCoord; \
+ effectCoord.s0 = coord % 8192; \
+ effectCoord.s1 = coord / 8192; \
+ return __gen_ocl_read_image ##suffix( \
+ cl_image, defaultSampler, effectCoord, 0); \
+ }
+
#define DECL_IMAGE_1DBuffer(int_clamping_fix, image_data_type, suffix) \
- DECL_READ_IMAGE_NOSAMPLER(image1d_buffer_t, image_data_type, \
- suffix, int) \
+ DECL_READ_IMAGE1D_BUFFER_NOSAMPLER(image1d_buffer_t, image_data_type, \
+ suffix, int) \
DECL_WRITE_IMAGE(image1d_buffer_t, image_data_type, suffix, int)
DECL_IMAGE_1DBuffer(GEN_FIX_INT_CLAMPING, int4, i)
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index ed19f10..4faa15a 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -50,7 +50,7 @@
.image3d_max_width = 8192,
.image3d_max_height = 8192,
.image3d_max_depth = 2048,
-.image_mem_size = 8192,
+.image_mem_size = 65536,
.max_samplers = 16,
.mem_base_addr_align = sizeof(cl_long) * 16 * 8,
.min_data_type_align_size = sizeof(cl_long) * 16,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3055bea..3225fd2 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -190,10 +190,18 @@ cl_get_image_info(cl_mem mem,
*(size_t *)param_value = image->slice_pitch;
break;
case CL_IMAGE_WIDTH:
- *(size_t *)param_value = image->w;
+
+ if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
+ struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image*) image;
+ *(size_t *)param_value = buffer1d_image->size;
+ } else
+ *(size_t *)param_value = image->w;
break;
case CL_IMAGE_HEIGHT:
- *(size_t *)param_value = IS_1D(image) ? 0 : image->h;
+ if (mem->type == CL_MEM_BUFFER1D_IMAGE_TYPE)
+ *(size_t *)param_value = 0;
+ else
+ *(size_t *)param_value = IS_1D(image) ? 0 : image->h;
break;
case CL_IMAGE_DEPTH:
*(size_t *)param_value = IS_3D(image) ? image->depth : 0;
@@ -243,6 +251,10 @@ cl_mem_allocate(enum cl_mem_type type,
struct _cl_mem_gl_image *gl_image = NULL;
TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image));
mem = &gl_image->base.base;
+ } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
+ struct _cl_mem_buffer1d_image *buffer1d_image = NULL;
+ TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image));
+ mem = &buffer1d_image->base.base;
} else {
struct _cl_mem_buffer *buffer = NULL;
TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer));
@@ -678,6 +690,7 @@ _cl_mem_new_image(cl_context ctx,
cl_mem_object_type image_type = orig_image_type;
uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
size_t sz = 0, aligned_pitch = 0, aligned_slice_pitch = 0, aligned_h = 0;
+ size_t origin_width = w; // for image1d buffer work around.
cl_image_tiling_t tiling = CL_NO_TILE;
/* Check flags consistency */
@@ -710,8 +723,7 @@ _cl_mem_new_image(cl_context ctx,
image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)))
DO_IMAGE_ERROR;
- if (image_type == CL_MEM_OBJECT_IMAGE1D ||
- image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ if (image_type == CL_MEM_OBJECT_IMAGE1D) {
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
@@ -724,19 +736,30 @@ _cl_mem_new_image(cl_context ctx,
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && slice_pitch != 0)) DO_IMAGE_ERROR;
tiling = CL_NO_TILE;
- } else if (image_type == CL_MEM_OBJECT_IMAGE2D) {
+ } else if (image_type == CL_MEM_OBJECT_IMAGE2D ||
+ image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+
+ if (image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ if (UNLIKELY(w > ctx->device->image_mem_size)) DO_IMAGE_ERROR;
+ /* This is an image1d buffer which exceeds normal image size restrication
+ We have to use a 2D image to simulate this 1D image. */
+ h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
+ w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
+ tiling = CL_NO_TILE;
+ } else if (cl_driver_get_ver(ctx->drv) != 6) {
+ /* Pick up tiling mode (we do only linear on SNB) */
+ tiling = cl_get_default_tiling(ctx->drv);
+ }
+
size_t min_pitch = bpp * w;
if (data && pitch == 0)
pitch = min_pitch;
+
if (UNLIKELY(w > ctx->device->image2d_max_width)) DO_IMAGE_ERROR;
if (UNLIKELY(h > ctx->device->image2d_max_height)) DO_IMAGE_ERROR;
if (UNLIKELY(data && min_pitch > pitch)) DO_IMAGE_ERROR;
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
- /* Pick up tiling mode (we do only linear on SNB) */
- if (cl_driver_get_ver(ctx->drv) != 6)
- tiling = cl_get_default_tiling(ctx->drv);
-
depth = 1;
} else if (image_type == CL_MEM_OBJECT_IMAGE3D ||
image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY ||
@@ -791,7 +814,16 @@ _cl_mem_new_image(cl_context ctx,
sz = aligned_pitch * aligned_h * depth;
}
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ else {
+ mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ if (mem != NULL && err == CL_SUCCESS) {
+ struct _cl_mem_buffer1d_image *buffer1d_image = (struct _cl_mem_buffer1d_image *)mem;
+ buffer1d_image->size = origin_width;;
+ }
+ }
+
if (mem == NULL || err != CL_SUCCESS)
goto error;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1641dcc..fd50220 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -72,6 +72,7 @@ enum cl_mem_type {
CL_MEM_SUBBUFFER_TYPE,
CL_MEM_IMAGE_TYPE,
CL_MEM_GL_IMAGE_TYPE,
+ CL_MEM_BUFFER1D_IMAGE_TYPE
};
#define IS_IMAGE(mem) (mem->type >= CL_MEM_IMAGE_TYPE)
#define IS_GL_IMAGE(mem) (mem->type == CL_MEM_GL_IMAGE_TYPE)
@@ -117,6 +118,11 @@ struct _cl_mem_gl_image {
uint32_t texture;
};
+struct _cl_mem_buffer1d_image {
+ struct _cl_mem_image base;
+ uint32_t size;
+};
+
inline static void
cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h,
cl_mem_object_type image_type,
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 0df7876..c80a11b 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1003,11 +1003,11 @@ static int
intel_get_surface_type(cl_mem_object_type type)
{
switch (type) {
- case CL_MEM_OBJECT_IMAGE1D_BUFFER:
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
return I965_SURFACE_1D;
+ case CL_MEM_OBJECT_IMAGE1D_BUFFER:
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
return I965_SURFACE_2D;
--
1.8.3.2
More information about the Beignet
mailing list