[Beignet] [patch v2 2/3] enable create image 2d from buffer in clCreateImage.

xionghu.luo at intel.com xionghu.luo at intel.com
Fri Aug 28 00:52:19 PDT 2015


From: Luo Xionghu <xionghu.luo at intel.com>

this patch allows create 2d image with a cl buffer with zero copy.
v2: should use reference to manage the release the buffer and image.
After being created, the buffer reference count is 2, and image reference
count is 1.
if image is released first, decrease the image reference count and
buffer reference count both, release the bo when the buffer is released
at last;
if buffer is released first, decrease the buffer reference count only,
release the buffer when the image is released.
add CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT in cl_device_info.

Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
 src/cl_api.c        |   3 +-
 src/cl_device_id.c  |   2 +
 src/cl_device_id.h  |   2 +
 src/cl_extensions.c |   2 +
 src/cl_gt_device.h  |   3 +-
 src/cl_mem.c        | 109 +++++++++++++++++++++++++++++++++++++++-------------
 src/cl_mem.h        |   1 +
 7 files changed, 93 insertions(+), 29 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 5c9b250..0690af4 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -549,8 +549,9 @@ clCreateImage(cl_context context,
     goto error;
   }
   /* buffer refers to a valid buffer memory object if image_type is
-     CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
+     CL_MEM_OBJECT_IMAGE1D_BUFFER or CL_MEM_OBJECT_IMAGE2D. Otherwise it must be NULL. */
   if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
+      image_desc->image_type != CL_MEM_OBJECT_IMAGE2D &&
          image_desc->buffer) {
     err = CL_INVALID_IMAGE_DESCRIPTOR;
     goto error;
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 1778292..78d2cf4 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -810,6 +810,8 @@ cl_get_device_info(cl_device_id     device,
     DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
     DECL_FIELD(PARTITION_TYPE, partition_type)
     DECL_FIELD(REFERENCE_COUNT, device_reference_count)
+    DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
+    DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment)
 
     case CL_DRIVER_VERSION:
       if (param_value_size_ret) {
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index b5db91c..02d1e0f 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -116,6 +116,8 @@ struct _cl_device_id {
   cl_device_partition_property partition_type[3];
   cl_uint      device_reference_count;
   uint32_t atomic_test_result;
+  uint32_t image_pitch_alignment;
+  uint32_t image_base_address_alignment;
 };
 
 /* Get a device from the given platform */
diff --git a/src/cl_extensions.c b/src/cl_extensions.c
index 3eb303f..6cb1579 100644
--- a/src/cl_extensions.c
+++ b/src/cl_extensions.c
@@ -46,6 +46,8 @@ void check_opt1_extension(cl_extensions_t *extensions)
     if (id == EXT_ID(khr_spir))
       extensions->extensions[id].base.ext_enabled = 1;
 #endif
+    if (id == EXT_ID(khr_image2d_from_buffer))
+      extensions->extensions[id].base.ext_enabled = 1;
   }
 }
 
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index a51843d..c2f9f56 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -126,4 +126,5 @@ DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
 .affinity_domain = 0,
 .partition_type = {0},
 .device_reference_count = 1,
-
+.image_pitch_alignment = 1,
+.image_base_address_alignment = 4096,
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b5671bd..bb065f5 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -264,6 +264,7 @@ cl_mem_allocate(enum cl_mem_type type,
   SET_ICD(mem->dispatch)
   mem->ref_n = 1;
   mem->magic = CL_MAGIC_MEM_HEADER;
+  mem->is_image_from_buffer = 0;
   mem->flags = flags;
   mem->is_userptr = 0;
   mem->offset = 0;
@@ -308,10 +309,19 @@ cl_mem_allocate(enum cl_mem_type type,
       }
     }
 
-    if (!mem->is_userptr)
+    if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
+      // if the image if created from buffer, should use the bo directly to share same bo.
+      mem->bo = ((cl_mem)host_ptr)->bo;
+      mem->is_image_from_buffer = 1;
+    } else  if (!mem->is_userptr)
       mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
 #else
-    mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
+    if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
+      // if the image if created from buffer, should use the bo directly to share same bo.
+      mem->bo = ((cl_mem)host_ptr)->bo;
+      mem->is_image_from_buffer = 1;
+    } else
+      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
 #endif
 
     if (UNLIKELY(mem->bo == NULL)) {
@@ -756,6 +766,8 @@ _cl_mem_new_image(cl_context ctx,
       h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
       w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
       tiling = CL_NO_TILE;
+    } else if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
+      tiling = CL_NO_TILE;
     } else if (cl_driver_get_ver(ctx->drv) != 6) {
       /* Pick up tiling mode (we do only linear on SNB) */
       tiling = cl_get_default_tiling(ctx->drv);
@@ -804,7 +816,10 @@ _cl_mem_new_image(cl_context ctx,
   /* Tiling requires to align both pitch and height */
   if (tiling == CL_NO_TILE) {
     aligned_pitch = w * bpp;
-    aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+    if(image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+      aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+    else
+      aligned_h = h;
   } else if (tiling == CL_TILE_X) {
     aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
     aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
@@ -814,6 +829,11 @@ _cl_mem_new_image(cl_context ctx,
   }
 
   sz = aligned_pitch * aligned_h * depth;
+  if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER)  {
+    //image 2d created from buffer: the buffer sz maybe larger than the image 2d.
+    if( ((cl_mem)data)->size > sz)
+      sz = ((cl_mem)data)->size;
+  }
 
   /* If sz is large than 128MB, map gtt may fail in some system.
      Because there is no obviours performance drop, disable tiling. */
@@ -825,7 +845,7 @@ _cl_mem_new_image(cl_context ctx,
   }
 
   if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
-    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
+    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, &err);
   else {
     mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
     if (mem != NULL && err == CL_SUCCESS) {
@@ -837,7 +857,11 @@ _cl_mem_new_image(cl_context ctx,
   if (mem == NULL || err != CL_SUCCESS)
     goto error;
 
-  cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+  if(!(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER))  {
+    //no need set tiling if image 2d created from buffer since share same bo.
+    cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+  }
+
   if (image_type == CL_MEM_OBJECT_IMAGE1D ||
       image_type == CL_MEM_OBJECT_IMAGE2D ||
       image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
@@ -971,33 +995,51 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
   if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
     goto error;
 
-  // Per bspec, a image should has a at least 2 line vertical alignment,
-  // thus we can't simply attach a buffer to a 1d image surface which has the same size.
-  // We have to create a new image, and copy the buffer data to this new image.
-  // And replace all the buffer object's reference to this image.
-  image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+  if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
+    image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
+                             image_desc->buffer, errcode_ret);
+  } else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
+    // Per bspec, a image should has a at least 2 line vertical alignment,
+    // thus we can't simply attach a buffer to a 1d image surface which has the same size.
+    // We have to create a new image, and copy the buffer data to this new image.
+    // And replace all the buffer object's reference to this image.
+    image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
                     mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
+  }
+  else
+    assert(0);
+
   if (image == NULL)
     return NULL;
-  void *src = cl_mem_map(buffer, 0);
-  void *dst = cl_mem_map(image, 1);
-  //
-  // FIXME, we could use copy buffer to image to do this on GPU latter.
-  // currently the copy buffer to image function doesn't support 1D image.
-  // 
-  // There is a potential risk that this buffer was mapped and the caller
-  // still hold the pointer and want to access it again. This scenario is
-  // not explicitly forbidden in the spec, although it should not be permitted.
-  memcpy(dst, src, mem_buffer->base.size);
-  cl_mem_unmap(buffer);
-  cl_mem_unmap(image);
+
+  if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D)  {
+    //no need copy since the image 2d and buffer share same bo.
+  }
+  else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)  {
+    // FIXME, we could use copy buffer to image to do this on GPU latter.
+    // currently the copy buffer to image function doesn't support 1D image.
+    //
+    // There is a potential risk that this buffer was mapped and the caller
+    // still hold the pointer and want to access it again. This scenario is
+    // not explicitly forbidden in the spec, although it should not be permitted.
+    void *src = cl_mem_map(buffer, 0);
+    void *dst = cl_mem_map(image, 1);
+    memcpy(dst, src, mem_buffer->base.size);
+    cl_mem_unmap(image);
+    cl_mem_unmap(buffer);
+  }
+  else
+    assert(0);
 
   if (err != 0)
     goto error;
  
   // Now replace buffer's bo to this new bo, need to take care of sub buffer
   // case. 
-  cl_mem_replace_buffer(buffer, image->bo);
+  if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+    cl_mem_replace_buffer(buffer, image->bo);
   /* Now point to the right offset if buffer is a SUB_BUFFER. */
   if (buffer->flags & CL_MEM_USE_HOST_PTR)
     image->host_ptr = buffer->host_ptr + offset;
@@ -1025,12 +1067,20 @@ cl_mem_new_image(cl_context context,
 {
   switch (image_desc->image_type) {
   case CL_MEM_OBJECT_IMAGE1D:
-  case CL_MEM_OBJECT_IMAGE2D:
   case CL_MEM_OBJECT_IMAGE3D:
     return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
                              image_desc->image_width, image_desc->image_height, image_desc->image_depth,
                              image_desc->image_row_pitch, image_desc->image_slice_pitch,
                              host_ptr, errcode_ret);
+  case CL_MEM_OBJECT_IMAGE2D:
+    if(image_desc->buffer)
+      return _cl_mem_new_image_from_buffer(context, flags, image_format,
+                             image_desc, errcode_ret);
+    else
+      return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
+                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
+                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
+                             host_ptr, errcode_ret);
   case CL_MEM_OBJECT_IMAGE1D_ARRAY:
   case CL_MEM_OBJECT_IMAGE2D_ARRAY:
     return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
@@ -1065,9 +1115,14 @@ cl_mem_delete(cl_mem mem)
   /* iff we are a image, delete the 1d buffer if has. */
   if (IS_IMAGE(mem)) {
     if (cl_mem_image(mem)->buffer_1d) {
-      assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER);
-      cl_mem_delete(cl_mem_image(mem)->buffer_1d);
-      cl_mem_image(mem)->buffer_1d = NULL;
+      assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
+          cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
+        cl_mem_delete(cl_mem_image(mem)->buffer_1d);
+        if(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D && mem->is_image_from_buffer == 1)
+        {
+          cl_mem_image(mem)->buffer_1d = NULL;
+          mem->bo = NULL;
+        }
     }
   }
 
diff --git a/src/cl_mem.h b/src/cl_mem.h
index e027f15..cdbc6da 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -94,6 +94,7 @@ typedef  struct _cl_mem {
   uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
   cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
   uint8_t is_userptr;       /* CL_MEM_USE_HOST_PTR is enabled*/
+  uint8_t is_image_from_buffer;       /* IMAGE from Buffer*/
   size_t offset;            /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
 } _cl_mem;
 
-- 
1.9.1



More information about the Beignet mailing list