[Beignet] [PATCH 2/3] Implement the clCreateSubBuffer API

junyan.he at linux.intel.com junyan.he at linux.intel.com
Mon Nov 4 22:43:17 PST 2013


From: Junyan He <junyan.he at linux.intel.com>

---
 src/cl_api.c           |   44 ++++++++++++++----
 src/cl_command_queue.c |    7 ++-
 src/cl_driver.h        |    2 +-
 src/cl_enqueue.c       |   29 ++++++++----
 src/cl_mem.c           |  120 ++++++++++++++++++++++++++++++++++++++++++++++--
 src/cl_mem.h           |   10 +++-
 6 files changed, 189 insertions(+), 23 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 70a8d09..acafd09 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -482,13 +482,17 @@ clCreateSubBuffer(cl_mem                buffer,
                   const void *          buffer_create_info,
                   cl_int *              errcode_ret)
 {
-#if 0
+  cl_mem mem = NULL;
   cl_int err = CL_SUCCESS;
-  CHECK_MEM (buffer);
-  NOT_IMPLEMENTED;
+
+  CHECK_MEM(buffer);
+
+  mem = cl_mem_new_sub_buffer(buffer, flags, buffer_create_type,
+                       buffer_create_info, &err);
 error:
-#endif
-  return NULL;
+  if (errcode_ret)
+    *errcode_ret = err;
+  return mem;
 }
 
 cl_mem
@@ -1575,7 +1579,7 @@ clEnqueueCopyBuffer(cl_command_queue     command_queue,
     err = CL_INVALID_VALUE;
     goto error;
   }
-  if (dst_offset < 0 || dst_offset + cb > src_buffer->size) {
+  if (dst_offset < 0 || dst_offset + cb > dst_buffer->size) {
     err = CL_INVALID_VALUE;
     goto error;
   }
@@ -1588,7 +1592,22 @@ clEnqueueCopyBuffer(cl_command_queue     command_queue,
     goto error;
   }
 
-  // TODO: Need to check the sub buffer cases.
+  /* Check sub overlap */
+  if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type == CL_MEM_SUBBUFFER_TYPE ) {
+    struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer;
+    struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer;
+    size_t src_sub_offset = src_b->sub_offset;
+    size_t dst_sub_offset = dst_b->sub_offset;
+
+    if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset
+          && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1)
+     && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset
+          && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) {
+      err = CL_MEM_COPY_OVERLAP;
+      goto error;
+    }
+  }
+
   err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb);
 
   TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx);
@@ -2047,15 +2066,22 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
 {
   cl_int slot = -1;
   int err = CL_SUCCESS;
+  size_t sub_offset = 0;
+
+  if(mem->type == CL_MEM_SUBBUFFER_TYPE) {
+    struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+    sub_offset = buffer->sub_offset;
+  }
+
   if (!(*ptr = cl_mem_map_gtt_unsync(mem))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
-  *ptr = (char*)(*ptr) + offset;
+  *ptr = (char*)(*ptr) + offset + sub_offset;
   if(mem->flags & CL_MEM_USE_HOST_PTR) {
     assert(mem->host_ptr);
     //only calc ptr here, will do memcpy in enqueue
-    *mem_ptr = mem->host_ptr + offset;
+    *mem_ptr = mem->host_ptr + offset + sub_offset;
   } else {
     *mem_ptr = *ptr;
   }
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 768ef0c..3f9d95c 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -145,7 +145,12 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
     if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem)
       continue;
     offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i);
-    cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3);
+    if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) {
+      struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem;
+      cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, cc_llc_l3);
+    } else {
+      cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, 0, cc_llc_l3);
+    }
   }
 
   return CL_SUCCESS;
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 0e9b487..640bce7 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -99,7 +99,7 @@ typedef cl_gpgpu (cl_gpgpu_sync_cb)(cl_gpgpu);
 extern cl_gpgpu_sync_cb *cl_gpgpu_sync;
 
 /* Bind a regular unformatted buffer */
-typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint);
+typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, uint32_t cchint);
 extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf;
 
 /* bind samplers defined in both kernel and kernel args. */
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 0330691..070fd98 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -30,14 +30,18 @@
 cl_int cl_enqueue_read_buffer(enqueue_data* data)
 {
   cl_int err = CL_SUCCESS;
+  cl_mem mem = data->mem_obj;
+  assert(mem->type == CL_MEM_BUFFER_TYPE ||
+         mem->type == CL_MEM_SUBBUFFER_TYPE);
   void* src_ptr;
+  struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
 
   if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
 
-  memcpy(data->ptr, (char*)src_ptr + data->offset, data->size);
+  memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset, data->size);
 
   err = cl_mem_unmap_auto(data->mem_obj);
 
@@ -95,6 +99,10 @@ error:
 cl_int cl_enqueue_write_buffer(enqueue_data *data)
 {
   cl_int err = CL_SUCCESS;
+  cl_mem mem = data->mem_obj;
+  assert(mem->type == CL_MEM_BUFFER_TYPE ||
+         mem->type == CL_MEM_SUBBUFFER_TYPE);
+  struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
   void* dst_ptr;
 
   if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
@@ -102,7 +110,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
     goto error;
   }
 
-  memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size);
+  memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size);
 
   err = cl_mem_unmap_auto(data->mem_obj);
 
@@ -231,19 +239,23 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
 {
   void *ptr = NULL;
   cl_int err = CL_SUCCESS;
-  cl_mem buffer = data->mem_obj;
+  cl_mem mem = data->mem_obj;
+  assert(mem->type == CL_MEM_BUFFER_TYPE ||
+         mem->type == CL_MEM_SUBBUFFER_TYPE);
+  struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+
   //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
-  if (!(ptr = cl_mem_map_gtt(buffer))) {
+  if (!(ptr = cl_mem_map_gtt(mem))) {
     err = CL_MAP_FAILURE;
     goto error;
   }
 
-  ptr = (char*)ptr + data->offset;
+  ptr = (char*)ptr + data->offset + buffer->sub_offset;
   assert(data->ptr == ptr);
 
-  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
-    assert(buffer->host_ptr);
-    memcpy(buffer->host_ptr + data->offset, ptr, data->size);
+  if(mem->flags & CL_MEM_USE_HOST_PTR) {
+    assert(mem->host_ptr);
+    memcpy(mem->host_ptr + data->offset, ptr, data->size);
   }
 
 error:
@@ -386,6 +398,7 @@ cl_int cl_enqueue_handle(enqueue_data* data)
     case EnqueueUnmapMemObject:
       return cl_enqueue_unmap_mem_object(data);
     case EnqueueCopyBufferRect:
+    case EnqueueCopyBuffer:
     case EnqueueCopyImage:
     case EnqueueCopyBufferToImage:
     case EnqueueCopyImageToBuffer:
diff --git a/src/cl_mem.c b/src/cl_mem.c
index b4c709d..00ef30d 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -52,7 +52,9 @@ static cl_mem_object_type
 cl_get_mem_object_type(cl_mem mem)
 {
   switch (mem->type) {
-    case CL_MEM_BUFFER_TYPE: return CL_MEM_OBJECT_BUFFER;
+    case CL_MEM_BUFFER_TYPE:
+    case CL_MEM_SUBBUFFER_TYPE:
+      return CL_MEM_OBJECT_BUFFER;
     case CL_MEM_IMAGE_TYPE:
     case CL_MEM_GL_IMAGE_TYPE:
     {
@@ -329,6 +331,102 @@ error:
   goto exit;
 }
 
+LOCAL cl_mem
+cl_mem_new_sub_buffer(cl_mem buffer,
+                      cl_mem_flags flags,
+                      cl_buffer_create_type create_type,
+                      const void *create_info,
+                      cl_int *errcode_ret)
+{
+  cl_int err = CL_SUCCESS;
+  cl_mem mem = NULL;
+  struct _cl_mem_buffer *sub_buf = NULL;
+
+  if (buffer->type != CL_MEM_BUFFER_TYPE) {
+    err = CL_INVALID_MEM_OBJECT;
+    goto error;
+  }
+
+  if (flags && (((buffer->flags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY)))
+          || ((buffer->flags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY)))
+          || (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)))) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  if (create_type != CL_BUFFER_CREATE_TYPE_REGION) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  if (!create_info) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  cl_buffer_region *info = (cl_buffer_region *)create_info;
+
+  if (!info->size) {
+    err = CL_INVALID_BUFFER_SIZE;
+    goto error;
+  }
+
+  if (info->origin > buffer->size || info->origin + info->size > buffer->size) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  if (info->origin & (buffer->ctx->device->mem_base_addr_align - 1)) {
+    err = CL_MISALIGNED_SUB_BUFFER_OFFSET;
+    goto error;
+  }
+
+  /* Now create the sub buffer and link it to the buffer. */
+  TRY_ALLOC (sub_buf, CALLOC(struct _cl_mem_buffer));
+  mem = &sub_buf->base;
+  mem->type = CL_MEM_SUBBUFFER_TYPE;
+  SET_ICD(mem->dispatch)
+  mem->ref_n = 1;
+  mem->magic = CL_MAGIC_MEM_HEADER;
+  mem->flags = flags;
+  sub_buf->parent = (struct _cl_mem_buffer*)buffer;
+
+  cl_mem_add_ref(buffer);
+  /* Append the buffer in the parent buffer list */
+  pthread_mutex_lock(&((struct _cl_mem_buffer*)buffer)->sub_lock);
+  sub_buf->sub_next = ((struct _cl_mem_buffer*)buffer)->subs;
+  if (((struct _cl_mem_buffer*)buffer)->subs != NULL)
+    ((struct _cl_mem_buffer*)buffer)->subs->sub_prev = sub_buf;
+  ((struct _cl_mem_buffer*)buffer)->subs = sub_buf;
+  pthread_mutex_unlock(&((struct _cl_mem_buffer*)buffer)->sub_lock);
+
+  mem->bo = buffer->bo;
+  mem->size = info->size;
+  sub_buf->sub_offset = info->origin;
+  if (buffer->flags & CL_MEM_USE_HOST_PTR || buffer->flags & CL_MEM_COPY_HOST_PTR) {
+    mem->host_ptr = buffer->host_ptr;
+  }
+
+  cl_context_add_ref(buffer->ctx);
+  mem->ctx = buffer->ctx;
+  /* Append the buffer in the context buffer list */
+  pthread_mutex_lock(&buffer->ctx->buffer_lock);
+  mem->next = buffer->ctx->buffers;
+  if (buffer->ctx->buffers != NULL)
+    buffer->ctx->buffers->prev = mem;
+  buffer->ctx->buffers = mem;
+  pthread_mutex_unlock(&buffer->ctx->buffer_lock);
+
+exit:
+  if (errcode_ret)
+    *errcode_ret = err;
+  return mem;
+error:
+  cl_mem_delete(mem);
+  mem = NULL;
+  goto exit;
+}
+
 void
 cl_mem_copy_image_region(const size_t *origin, const size_t *region,
                          void *dst, size_t dst_row_pitch, size_t dst_slice_pitch,
@@ -546,8 +644,6 @@ cl_mem_delete(cl_mem mem)
      cl_mem_gl_delete(cl_mem_gl_image(mem));
   }
 #endif
-  if (LIKELY(mem->bo != NULL))
-    cl_buffer_unreference(mem->bo);
 
   /* Remove it from the list */
   assert(mem->ctx);
@@ -586,6 +682,24 @@ cl_mem_delete(cl_mem mem)
     }
   }
 
+  /* Iff we are sub, do nothing for bo release. */
+  if (mem->type == CL_MEM_SUBBUFFER_TYPE) {
+    struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
+    /* Remove it from the parent's list */
+    assert(buffer->parent);
+    pthread_mutex_lock(&buffer->parent->sub_lock);
+    if (buffer->sub_prev)
+      buffer->sub_prev->sub_next = buffer->sub_next;
+    if (buffer->sub_next)
+      buffer->sub_next->sub_prev = buffer->sub_prev;
+    if (buffer->parent->subs == buffer)
+      buffer->parent->subs = buffer->sub_next;
+    pthread_mutex_unlock(&buffer->parent->sub_lock);
+    cl_mem_delete((cl_mem )(buffer->parent));
+  } else if (LIKELY(mem->bo != NULL)) {
+    cl_buffer_unreference(mem->bo);
+  }
+
   cl_free(mem);
 }
 
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 75d5cf4..e325fa1 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -66,6 +66,7 @@ typedef struct _cl_mem_dstr_cb {
 /* Used for buffers and images */
 enum cl_mem_type {
   CL_MEM_BUFFER_TYPE,
+  CL_MEM_SUBBUFFER_TYPE,
   CL_MEM_IMAGE_TYPE,
   CL_MEM_GL_IMAGE_TYPE,
 };
@@ -137,7 +138,11 @@ cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h,
 
 struct _cl_mem_buffer {
   _cl_mem base;
-  size_t offset;
+  struct _cl_mem_buffer* subs;         /* Sub buf objects. */
+  size_t sub_offset;                   /* The sub start offset. */
+  struct _cl_mem_buffer* sub_prev, *sub_next;/* We chain the sub memory buffers together */
+  pthread_mutex_t sub_lock;            /* Sub buffers list lock*/
+  struct _cl_mem_buffer* parent;       /* Point to the parent buffer if is sub-buffer */
 };
 
 inline static struct _cl_mem_image *
@@ -170,6 +175,9 @@ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *)
 /* Create a new memory object and initialize it with possible user data */
 extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*);
 
+/* Create a new sub memory object */
+extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *);
+
 /* Idem but this is an image */
 extern cl_mem
 cl_mem_new_image(cl_context context,
-- 
1.7.9.5



More information about the Beignet mailing list