[Beignet] [PATCH 2/3] enable create image 2d from buffer in clCreateImage.
xionghu.luo at intel.com
xionghu.luo at intel.com
Wed Jul 22 19:54:13 PDT 2015
From: Luo Xionghu <xionghu.luo at intel.com>
this patch allows create 2d image with a cl buffer with zero copy.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
src/cl_api.c | 3 +-
src/cl_device_id.c | 1 +
src/cl_device_id.h | 1 +
src/cl_extensions.c | 2 +
src/cl_gt_device.h | 1 +
src/cl_mem.c | 109 +++++++++++++++++++++++++++++++++++++++-------------
src/cl_mem.h | 1 +
7 files changed, 90 insertions(+), 28 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 69eb0bc..28783fd 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -549,8 +549,9 @@ clCreateImage(cl_context context,
goto error;
}
/* buffer refers to a valid buffer memory object if image_type is
- CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
+ CL_MEM_OBJECT_IMAGE1D_BUFFER or CL_MEM_OBJECT_IMAGE2D. Otherwise it must be NULL. */
if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
+ image_desc->image_type != CL_MEM_OBJECT_IMAGE2D &&
image_desc->buffer) {
err = CL_INVALID_IMAGE_DESCRIPTOR;
goto error;
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index e9e2c16..0713456 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -808,6 +808,7 @@ cl_get_device_info(cl_device_id device,
DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
DECL_FIELD(PARTITION_TYPE, partition_type)
DECL_FIELD(REFERENCE_COUNT, device_reference_count)
+ DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
case CL_DRIVER_VERSION:
if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 6daa31c..a14b3c8 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -115,6 +115,7 @@ struct _cl_device_id {
cl_device_partition_property partition_type[3];
cl_uint device_reference_count;
uint32_t atomic_test_result;
+ uint32_t image_pitch_alignment;
};
/* Get a device from the given platform */
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index 3eb303f..6cb1579 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -46,6 +46,8 @@ void check_opt1_extension(cl_extensions_t *extensions)
if (id == EXT_ID(khr_spir))
extensions->extensions[id].base.ext_enabled = 1;
#endif
+ if (id == EXT_ID(khr_image2d_from_buffer))
+ extensions->extensions[id].base.ext_enabled = 1;
}
}
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 26bb8e8..fa63040 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -125,4 +125,5 @@ DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
.affinity_domain = 0,
.partition_type = {0},
.device_reference_count = 1,
+.image_pitch_alignment = 1,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b5671bd..cff2625 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -266,6 +266,7 @@ cl_mem_allocate(enum cl_mem_type type,
mem->magic = CL_MAGIC_MEM_HEADER;
mem->flags = flags;
mem->is_userptr = 0;
+ mem->is_image_from_buffer = 0;
mem->offset = 0;
if (sz != 0) {
@@ -298,7 +299,7 @@ cl_mem_allocate(enum cl_mem_type type,
}
}
}
- else if (flags & CL_MEM_ALLOC_HOST_PTR) {
+ else if (flags & CL_MEM_ALLOC_HOST_PTR) {
const size_t alignedSZ = ALIGN(sz, page_size);
void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
mem->host_ptr = internal_host_ptr;
@@ -308,10 +309,19 @@ cl_mem_allocate(enum cl_mem_type type,
}
}
- if (!mem->is_userptr)
+ if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
+ // if the image if created from buffer, should use the bo directly to share same bo.
+ mem->bo = ((cl_mem)host_ptr)->bo;
+ mem->is_image_from_buffer = 1;
+ } else if (!mem->is_userptr)
mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
#else
- mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+ if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
+ // if the image if created from buffer, should use the bo directly to share same bo.
+ mem->bo = ((cl_mem)host_ptr)->bo;
+ mem->is_image_from_buffer = 1;
+ } else
+ mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
#endif
if (UNLIKELY(mem->bo == NULL)) {
@@ -756,6 +766,8 @@ _cl_mem_new_image(cl_context ctx,
h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
tiling = CL_NO_TILE;
+ } else if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
+ tiling = CL_NO_TILE;
} else if (cl_driver_get_ver(ctx->drv) != 6) {
/* Pick up tiling mode (we do only linear on SNB) */
tiling = cl_get_default_tiling(ctx->drv);
@@ -804,7 +816,10 @@ _cl_mem_new_image(cl_context ctx,
/* Tiling requires to align both pitch and height */
if (tiling == CL_NO_TILE) {
aligned_pitch = w * bpp;
- aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+ if(image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+ aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+ else
+ aligned_h = h;
} else if (tiling == CL_TILE_X) {
aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
@@ -814,6 +829,11 @@ _cl_mem_new_image(cl_context ctx,
}
sz = aligned_pitch * aligned_h * depth;
+ if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
+ //image 2d created from buffer: the buffer sz maybe larger than the image 2d.
+ if( ((cl_mem)data)->size > sz)
+ sz = ((cl_mem)data)->size;
+ }
/* If sz is large than 128MB, map gtt may fail in some system.
Because there is no obviours performance drop, disable tiling. */
@@ -825,7 +845,7 @@ _cl_mem_new_image(cl_context ctx,
}
if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
- mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, &err);
else {
mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
if (mem != NULL && err == CL_SUCCESS) {
@@ -837,7 +857,12 @@ _cl_mem_new_image(cl_context ctx,
if (mem == NULL || err != CL_SUCCESS)
goto error;
- cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+ if(!(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER))
+ {
+ //no need set tiling if image 2d created from buffer since share same bo.
+ cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+ }
+
if (image_type == CL_MEM_OBJECT_IMAGE1D ||
image_type == CL_MEM_OBJECT_IMAGE2D ||
image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
@@ -971,33 +996,53 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
goto error;
- // Per bspec, a image should has a at least 2 line vertical alignment,
- // thus we can't simply attach a buffer to a 1d image surface which has the same size.
- // We have to create a new image, and copy the buffer data to this new image.
- // And replace all the buffer object's reference to this image.
- image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+ if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
+ image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+ image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+ image_desc->image_row_pitch, image_desc->image_slice_pitch,
+ image_desc->buffer, errcode_ret);
+ } else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+ // Per bspec, a image should has a at least 2 line vertical alignment,
+ // thus we can't simply attach a buffer to a 1d image surface which has the same size.
+ // We have to create a new image, and copy the buffer data to this new image.
+ // And replace all the buffer object's reference to this image.
+ image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
+ }
+ else
+ assert(0);
+
if (image == NULL)
return NULL;
- void *src = cl_mem_map(buffer, 0);
- void *dst = cl_mem_map(image, 1);
- //
- // FIXME, we could use copy buffer to image to do this on GPU latter.
- // currently the copy buffer to image function doesn't support 1D image.
- //
- // There is a potential risk that this buffer was mapped and the caller
- // still hold the pointer and want to access it again. This scenario is
- // not explicitly forbidden in the spec, although it should not be permitted.
- memcpy(dst, src, mem_buffer->base.size);
- cl_mem_unmap(buffer);
- cl_mem_unmap(image);
+
+ if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D)
+ {
+ //no need copy since the image 2d and buffer share same bo.
+ }
+ else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ {
+ // FIXME, we could use copy buffer to image to do this on GPU latter.
+ // currently the copy buffer to image function doesn't support 1D image.
+ //
+ // There is a potential risk that this buffer was mapped and the caller
+ // still hold the pointer and want to access it again. This scenario is
+ // not explicitly forbidden in the spec, although it should not be permitted.
+ void *src = cl_mem_map(buffer, 0);
+ void *dst = cl_mem_map(image, 1);
+ memcpy(dst, src, mem_buffer->base.size);
+ cl_mem_unmap(image);
+ cl_mem_unmap(buffer);
+ }
+ else
+ assert(0);
if (err != 0)
goto error;
// Now replace buffer's bo to this new bo, need to take care of sub buffer
// case.
- cl_mem_replace_buffer(buffer, image->bo);
+ if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+ cl_mem_replace_buffer(buffer, image->bo);
/* Now point to the right offset if buffer is a SUB_BUFFER. */
if (buffer->flags & CL_MEM_USE_HOST_PTR)
image->host_ptr = buffer->host_ptr + offset;
@@ -1025,12 +1070,20 @@ cl_mem_new_image(cl_context context,
{
switch (image_desc->image_type) {
case CL_MEM_OBJECT_IMAGE1D:
- case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE3D:
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
image_desc->image_width, image_desc->image_height, image_desc->image_depth,
image_desc->image_row_pitch, image_desc->image_slice_pitch,
host_ptr, errcode_ret);
+ case CL_MEM_OBJECT_IMAGE2D:
+ if(image_desc->buffer)
+ return _cl_mem_new_image_from_buffer(context, flags, image_format,
+ image_desc, errcode_ret);
+ else
+ return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
+ image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+ image_desc->image_row_pitch, image_desc->image_slice_pitch,
+ host_ptr, errcode_ret);
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
@@ -1064,8 +1117,10 @@ cl_mem_delete(cl_mem mem)
/* iff we are a image, delete the 1d buffer if has. */
if (IS_IMAGE(mem)) {
- if (cl_mem_image(mem)->buffer_1d) {
- assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER);
+ if(mem->is_image_from_buffer)
+ mem->bo = NULL;
+ else if (cl_mem_image(mem)->buffer_1d) {
+ assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
cl_mem_delete(cl_mem_image(mem)->buffer_1d);
cl_mem_image(mem)->buffer_1d = NULL;
}
diff --git a/src/cl_mem.h b/src/cl_mem.h
index e027f15..cdbc6da 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -94,6 +94,7 @@ typedef struct _cl_mem {
uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled*/
+ uint8_t is_image_from_buffer; /* IMAGE from Buffer*/
size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
} _cl_mem;
--
1.9.1
More information about the Beignet
mailing list