[Beignet] [PATCH] Implement cl_khr_image2d_from_buffer extension.
Yan Wang
yan.wang at linux.intel.com
Tue Nov 25 03:07:13 PST 2014
Implement cl_khr_image2d_from_buffer extension.
---
CMakeLists.txt | 2 +
kernels/image_2D_buffer.cl | 15 +++++
src/CMakeLists.txt | 5 ++
src/cl_api.c | 9 +++
src/cl_device_id.c | 4 ++
src/cl_device_id.h | 4 ++
src/cl_extensions.c | 2 +-
src/cl_gt_device.h | 4 ++
src/cl_mem.c | 156 +++++++++++++++++++++++++++++++++++++++++++--
utests/CMakeLists.txt | 6 ++
utests/image_2D_buffer.cpp | 89 ++++++++++++++++++++++++++
11 files changed, 290 insertions(+), 6 deletions(-)
create mode 100644 kernels/image_2D_buffer.cl
create mode 100644 utests/image_2D_buffer.cpp
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 49c8929..5ca7d90 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -189,6 +189,8 @@ ELSE(OCLIcd_FOUND)
MESSAGE(STATUS "Looking for OCL ICD header file - not found")
ENDIF(OCLIcd_FOUND)
+SET (OCL_IMAGE2D_BUFFER "true")
+
Find_Package(PythonInterp)
ADD_SUBDIRECTORY(include)
diff --git a/kernels/image_2D_buffer.cl b/kernels/image_2D_buffer.cl
new file mode 100644
index 0000000..6b9060c
--- /dev/null
+++ b/kernels/image_2D_buffer.cl
@@ -0,0 +1,15 @@
+__kernel void image_2D_buffer(image2d_t image1, image2d_t image2, sampler_t sampler, __global int *results)
+{
+ int x = get_global_id(0);
+ int y = get_global_id(1);
+ int w = get_image_width(image1);
+ int offset = mad24(y, w, x);
+
+ int4 pix = read_imagei(image1, (int2)(x, y));
+ int4 test = (pix != read_imagei(image2, sampler, (int2)(x, y)));
+
+ if (test.x || test.y || test.z || test.w)
+ results[offset] = 0;
+ else
+ results[offset] = 1;
+}
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 7182bad..3ca5f1f 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -114,6 +114,11 @@ SET(CMAKE_CXX_FLAGS "-DHAS_USERPTR ${CMAKE_CXX_FLAGS}")
SET(CMAKE_C_FLAGS "-DHAS_USERPTR ${CMAKE_C_FLAGS}")
endif (DRM_INTEL_USERPTR)
+if (OCL_IMAGE2D_BUFFER)
+SET(CMAKE_CXX_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_CXX_FLAGS}")
+SET(CMAKE_C_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_C_FLAGS}")
+endif (OCL_IMAGE2D_BUFFER)
+
set(GIT_SHA1 "git_sha1.h")
add_custom_target(${GIT_SHA1} ALL
COMMAND chmod +x ${CMAKE_CURRENT_SOURCE_DIR}/git_sha1.sh
diff --git a/src/cl_api.c b/src/cl_api.c
index 972c687..04095a2 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -548,6 +548,14 @@ clCreateImage(cl_context context,
err = CL_INVALID_IMAGE_DESCRIPTOR;
goto error;
}
+#ifdef HAS_OCLImage2dBuffer
+ if ((image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
+ image_desc->image_type != CL_MEM_OBJECT_IMAGE2D) &&
+ image_desc->buffer) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+#else
/* buffer refers to a valid buffer memory object if image_type is
CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
@@ -555,6 +563,7 @@ clCreateImage(cl_context context,
err = CL_INVALID_IMAGE_DESCRIPTOR;
goto error;
}
+#endif
if (image_desc->num_mip_levels || image_desc->num_samples) {
err = CL_INVALID_IMAGE_DESCRIPTOR;
goto error;
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 5ef0bde..c47d48c 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -571,6 +571,10 @@ cl_get_device_info(cl_device_id device,
DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
DECL_FIELD(PARTITION_TYPE, partition_type)
DECL_FIELD(REFERENCE_COUNT, device_reference_count)
+#ifdef HAS_OCLImage2dBuffer
+ DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
+ DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment)
+#endif
case CL_DRIVER_VERSION:
if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index ee6a8e6..8d8adac 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -113,6 +113,10 @@ struct _cl_device_id {
cl_device_affinity_domain affinity_domain;
cl_device_partition_property partition_type[3];
cl_uint device_reference_count;
+#ifdef HAS_OCLImage2dBuffer
+ cl_uint image_pitch_alignment;
+ cl_uint image_base_address_alignment;
+#endif
};
/* Get a device from the given platform */
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index d07a525..e31386f 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -34,7 +34,7 @@ void check_opt1_extension(cl_extensions_t *extensions)
{
int id;
for(id = OPT1_EXT_START_ID; id <= OPT1_EXT_END_ID; id++)
- if (id == EXT_ID(khr_icd))
+ if (id == EXT_ID(khr_icd) || id == EXT_ID(khr_image2d_from_buffer))
extensions->extensions[id].base.ext_enabled = 1;
}
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index 37abfd2..f9c5ad4 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -124,4 +124,8 @@ DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
.affinity_domain = 0,
.partition_type = {0},
.device_reference_count = 1,
+#ifdef HAS_OCLImage2dBuffer
+.image_pitch_alignment = 4,
+.image_base_address_alignment = 4,
+#endif
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3323897..ab8efa8 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -673,6 +673,7 @@ _cl_mem_new_image(cl_context ctx,
size_t pitch,
size_t slice_pitch,
void *data,
+ int force_no_tiling,
cl_int *errcode_ret)
{
cl_int err = CL_SUCCESS;
@@ -736,7 +737,7 @@ _cl_mem_new_image(cl_context ctx,
if (UNLIKELY(!data && pitch != 0)) DO_IMAGE_ERROR;
/* Pick up tiling mode (we do only linear on SNB) */
- if (cl_driver_get_ver(ctx->drv) != 6)
+ if (!force_no_tiling && cl_driver_get_ver(ctx->drv) != 6)
tiling = cl_get_default_tiling(ctx->drv);
depth = 1;
@@ -935,7 +936,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
// We have to create a new image, and copy the buffer data to this new image.
// And replace all the buffer object's reference to this image.
image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
- mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
+ mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, 1, errcode_ret);
if (image == NULL)
return NULL;
void *src = cl_mem_map(buffer, 0);
@@ -953,7 +954,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
if (err != 0)
goto error;
-
+
// Now replace buffer's bo to this new bo, need to take care of sub buffer
// case.
cl_mem_replace_buffer(buffer, image->bo);
@@ -974,6 +975,128 @@ error:
return image;
}
+#ifdef HAS_OCLImage2dBuffer
+static cl_mem
+_cl_mem_new_image2d_from_buffer(cl_context ctx,
+ cl_mem_flags flags,
+ const cl_image_format* image_format,
+ const cl_image_desc *image_desc,
+ cl_int *errcode_ret)
+{
+ cl_mem image = NULL;
+ cl_mem buffer = image_desc->buffer;
+ cl_int err = CL_SUCCESS;
+ *errcode_ret = err;
+ cl_mem_flags merged_flags;
+ uint32_t bpp;
+ uint32_t intel_fmt = INTEL_UNSUPPORTED_FORMAT;
+ size_t offset = 0;
+
+ /* Get the size of each pixel */
+ if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
+ goto error;
+
+ /* Only a sub-set of the formats are supported */
+ intel_fmt = cl_image_get_intel_format(image_format);
+ if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) {
+ err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ goto error;
+ }
+
+ if (!buffer) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ if (flags & (CL_MEM_USE_HOST_PTR|CL_MEM_ALLOC_HOST_PTR|CL_MEM_COPY_HOST_PTR)) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ /* access check. */
+ if ((buffer->flags & CL_MEM_WRITE_ONLY) &&
+ (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY))) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((buffer->flags & CL_MEM_READ_ONLY) &&
+ (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((buffer->flags & CL_MEM_HOST_WRITE_ONLY) &&
+ (flags & CL_MEM_HOST_READ_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((buffer->flags & CL_MEM_HOST_READ_ONLY) &&
+ (flags & CL_MEM_HOST_WRITE_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((buffer->flags & CL_MEM_HOST_NO_ACCESS) &&
+ (flags & (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY))) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (image_desc->image_width * image_desc->image_height * bpp > buffer->size) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ merged_flags = buffer->flags;
+ if (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY)) {
+ merged_flags &= ~(CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY);
+ merged_flags |= flags & (CL_MEM_READ_WRITE|CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY);
+ }
+ if (flags & (CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS)) {
+ merged_flags &= ~(CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS);
+ merged_flags |= flags & (CL_MEM_HOST_WRITE_ONLY|CL_MEM_HOST_READ_ONLY|CL_MEM_HOST_NO_ACCESS);
+ }
+ struct _cl_mem_buffer *mem_buffer = (struct _cl_mem_buffer*)buffer;
+ if (buffer->type == CL_MEM_SUBBUFFER_TYPE) {
+ offset = ((struct _cl_mem_buffer *)buffer)->sub_offset;
+ mem_buffer = mem_buffer->parent;
+ }
+ /* Get the size of each pixel */
+ if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
+ goto error;
+
+ // Per bspec, a image should has a at least 2 line vertical alignment,
+ // thus we can't simply attach a buffer to a 1d image surface which has the same size.
+ // We have to create a new image, and copy the buffer data to this new image.
+ // And replace all the buffer object's reference to this image.
+ image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+ image_desc->image_width, image_desc->image_height, 0, 0, 0, NULL, 1, errcode_ret);
+ if (image == NULL)
+ return NULL;
+
+ if (err != 0)
+ goto error;
+
+ // Now replace buffer's bo to this new bo, need to take care of sub buffer
+ // case.
+ cl_mem_replace_buffer(image, buffer->bo);
+ /* Now point to the right offset if buffer is a SUB_BUFFER. */
+ if (buffer->flags & CL_MEM_USE_HOST_PTR)
+ image->host_ptr = buffer->host_ptr + offset;
+ cl_mem_image(image)->offset = offset;
+ cl_mem_image(image)->w = image_desc->image_width;
+ cl_mem_image(image)->h = image_desc->image_height;
+ cl_mem_add_ref(buffer);
+ cl_mem_image(image)->buffer_1d = buffer;
+ return image;
+
+error:
+ if (image)
+ cl_mem_delete(image);
+ image = NULL;
+ *errcode_ret = err;
+ return image;
+}
+#endif
+
LOCAL cl_mem
cl_mem_new_image(cl_context context,
cl_mem_flags flags,
@@ -983,19 +1106,37 @@ cl_mem_new_image(cl_context context,
cl_int *errcode_ret)
{
switch (image_desc->image_type) {
+#ifdef HAS_OCLImage2dBuffer
+ case CL_MEM_OBJECT_IMAGE1D:
+ case CL_MEM_OBJECT_IMAGE3D:
+ return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
+ image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+ image_desc->image_row_pitch, image_desc->image_slice_pitch,
+ host_ptr, 0, errcode_ret);
+ case CL_MEM_OBJECT_IMAGE2D:
+ if (image_desc->buffer)
+ return _cl_mem_new_image2d_from_buffer(context, flags, image_format,
+ image_desc, errcode_ret);
+ else
+ return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
+ image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+ image_desc->image_row_pitch, image_desc->image_slice_pitch,
+ host_ptr, 0, errcode_ret);
+#else
case CL_MEM_OBJECT_IMAGE1D:
case CL_MEM_OBJECT_IMAGE2D:
case CL_MEM_OBJECT_IMAGE3D:
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
image_desc->image_width, image_desc->image_height, image_desc->image_depth,
image_desc->image_row_pitch, image_desc->image_slice_pitch,
- host_ptr, errcode_ret);
+ host_ptr, 0, errcode_ret);
+#endif
case CL_MEM_OBJECT_IMAGE1D_ARRAY:
case CL_MEM_OBJECT_IMAGE2D_ARRAY:
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
image_desc->image_width, image_desc->image_height, image_desc->image_array_size,
image_desc->image_row_pitch, image_desc->image_slice_pitch,
- host_ptr, errcode_ret);
+ host_ptr, 0, errcode_ret);
case CL_MEM_OBJECT_IMAGE1D_BUFFER:
return _cl_mem_new_image_from_buffer(context, flags, image_format,
image_desc, errcode_ret);
@@ -1024,7 +1165,12 @@ cl_mem_delete(cl_mem mem)
/* iff we are a image, delete the 1d buffer if has. */
if (IS_IMAGE(mem)) {
if (cl_mem_image(mem)->buffer_1d) {
+#ifdef HAS_OCLImage2dBuffer
+ assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
+ cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
+#else
assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER);
+#endif
cl_mem_delete(cl_mem_image(mem)->buffer_1d);
cl_mem_image(mem)->buffer_1d = NULL;
}
diff --git a/utests/CMakeLists.txt b/utests/CMakeLists.txt
index 8cc8b43..3736652 100644
--- a/utests/CMakeLists.txt
+++ b/utests/CMakeLists.txt
@@ -242,6 +242,12 @@ else()
SET(UTESTS_REQUIRED_EGL_LIB "")
endif()
+if (OCL_IMAGE2D_BUFFER)
+SET(utests_sources ${utests_sources} image_2D_buffer.cpp)
+SET(CMAKE_CXX_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_CXX_FLAGS} ${DEF_OCL_PCH_PCM_PATH}")
+SET(CMAKE_C_FLAGS "-DHAS_OCLImage2dBuffer ${CMAKE_C_FLAGS} ${DEF_OCL_PCH_PCM_PATH}")
+endif ()
+
if (COMPILER STREQUAL "CLANG")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-tautological-compare")
endif ()
diff --git a/utests/image_2D_buffer.cpp b/utests/image_2D_buffer.cpp
new file mode 100644
index 0000000..e6e88d6
--- /dev/null
+++ b/utests/image_2D_buffer.cpp
@@ -0,0 +1,89 @@
+#include <string.h>
+#include "utest_helper.hpp"
+
+#define TEST_SIZE 1024
+
+void image_2D_buffer(void)
+{
+ size_t pix_w = TEST_SIZE;
+ size_t pix_h = TEST_SIZE;
+ size_t buffer_sz = pix_w * pix_h * sizeof(uint32_t);
+ char *buf_content = (char *)malloc(buffer_sz * sizeof(char));
+ int error;
+ cl_image_desc image_desc;
+ cl_image_format image_format;
+ cl_sampler sampler;
+ cl_mem image1, image2;
+ cl_mem ret_mem = NULL;
+
+ OCL_CREATE_KERNEL("image_2D_buffer");
+
+ for (int32_t i = 0; i < (int32_t)buffer_sz; ++i)
+ buf_content[i] = (rand() & 127);
+
+ cl_mem buff = clCreateBuffer(ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+ buffer_sz, buf_content, &error);
+ OCL_ASSERT(error == CL_SUCCESS);
+
+ memset(&image_desc, 0x0, sizeof(cl_image_desc));
+ memset(&image_format, 0x0, sizeof(cl_image_format));
+
+ image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ image_desc.image_row_pitch = pix_w * sizeof(uint32_t);
+ image_desc.image_width = pix_w;
+ image_desc.image_height = pix_h;
+ image_desc.image_array_size = 1;
+ image_desc.buffer = buff;
+
+ image_format.image_channel_order = CL_RGBA;
+ image_format.image_channel_data_type = CL_UNSIGNED_INT8;
+
+ image1 = clCreateImage(ctx, CL_MEM_READ_ONLY, &image_format,
+ &image_desc, NULL, &error );
+ OCL_ASSERT(error == CL_SUCCESS);
+
+ error = clGetImageInfo(image1, CL_IMAGE_BUFFER, sizeof(ret_mem), &ret_mem, NULL);
+ OCL_ASSERT(error == CL_SUCCESS);
+ OCL_ASSERT(ret_mem == buff);
+
+
+ memset(&image_desc, 0x0, sizeof(cl_image_desc));
+ image_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
+ image_desc.image_width = pix_w;
+ image_desc.image_height = pix_h;
+ image2 = clCreateImage(ctx, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
+ &image_format, &image_desc, buf_content, &error);
+ OCL_ASSERT(error == CL_SUCCESS);
+
+ // Create sampler to use
+ sampler = clCreateSampler(ctx, false, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
+ OCL_ASSERT(error == CL_SUCCESS);
+
+ cl_mem result_buf = buf[0] = clCreateBuffer(ctx, 0, buffer_sz, NULL, &error);
+ OCL_ASSERT(error == CL_SUCCESS);
+
+ OCL_SET_ARG(0, sizeof(cl_mem), &image1);
+ OCL_SET_ARG(1, sizeof(cl_mem), &image2);
+ OCL_SET_ARG(2, sizeof(sampler), &sampler);
+ OCL_SET_ARG(3, sizeof(cl_mem), &result_buf);
+
+ globals[0] = pix_w;
+ globals[1] = pix_h;
+ locals[0] = 16;
+ locals[1] = 16;
+
+ OCL_NDRANGE(2);
+
+ /* Now check the result. */
+ OCL_MAP_BUFFER(0);
+ for (uint32_t i = 0; i < buffer_sz/sizeof(uint32_t); i++)
+ OCL_ASSERT(((uint32_t*)buf_data[0])[i] == 1);
+ OCL_UNMAP_BUFFER(0);
+
+ clReleaseSampler(sampler);
+ clReleaseMemObject(image1);
+ clReleaseMemObject(image2);
+ clReleaseMemObject(buff);
+}
+
+MAKE_UTEST_FROM_FUNCTION(image_2D_buffer);
--
1.9.3
More information about the Beignet
mailing list