[Beignet] [PATCH V2 1/3] Implement api clEnqueueTask and clEnqueueNativeKernel.

Yang Rong rong.r.yang at intel.com
Thu Sep 12 23:06:59 PDT 2013


Also refine the whole memcpy's condition in function
cl_enqueue_read_buffer_rect and cl_enqueue_write_buffer_rect.

V2: Add a mem_list to enqueue_data to fix utest error.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_api.c       | 65 ++++++++++++++++++++++++++++++++++++++++++++++++------
 src/cl_enqueue.c   | 39 +++++++++++++++++++++++++++-----
 src/cl_enqueue.h   |  6 +++--
 src/cl_gt_device.h |  2 +-
 4 files changed, 97 insertions(+), 15 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index aeca782..64e11d6 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1861,7 +1861,7 @@ error:
 
 cl_int
 clEnqueueCopyImageToBuffer(cl_command_queue  command_queue,
-                           cl_mem            src_image,
+                           cl_mem            src_mem,
                            cl_mem            dst_buffer,
                            const size_t *    src_origin,
                            const size_t *    region,
@@ -2005,7 +2005,6 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
   data->mem_obj     = buffer;
   data->offset      = offset;
   data->size        = size;
-  data->map_flags   = map_flags;
   data->ptr         = ptr;
 
   if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
@@ -2094,7 +2093,6 @@ clEnqueueMapImage(cl_command_queue   command_queue,
   data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
   data->row_pitch   = *image_row_pitch;
   data->slice_pitch = *image_slice_pitch;
-  data->map_flags   = map_flags;
   data->ptr         = ptr;
   data->offset      = offset;
 
@@ -2253,8 +2251,11 @@ clEnqueueTask(cl_command_queue   command_queue,
               const cl_event *   event_wait_list,
               cl_event *         event)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  const size_t global_size[3] = {1, 0, 0};
+  const size_t local_size[3]  = {1, 0, 0};
+
+  return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size,
+                                num_events_in_wait_list, event_wait_list, event);
 }
 
 cl_int
@@ -2269,8 +2270,58 @@ clEnqueueNativeKernel(cl_command_queue   command_queue,
                       const cl_event *   event_wait_list,
                       cl_event *         event)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  void *new_args = NULL;
+  enqueue_data *data, no_wait_data = { 0 };
+  cl_int i;
+
+  if(user_func == NULL ||
+    (args == NULL && cb_args > 0) ||
+    (args == NULL && num_mem_objects ==0) ||
+    (args != NULL && cb_args == 0) ||
+    (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) ||
+    (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+
+  //Per spec, need copy args
+  if (cb_args)
+  {
+    new_args = malloc(cb_args);
+    if (!new_args)
+    {
+      err = CL_OUT_OF_HOST_MEMORY;
+      goto error;
+    }
+    memcpy(new_args, args, cb_args);
+
+    for (i=0; i<num_mem_objects; ++i)
+    {
+      CHECK_MEM(mem_list[i]);
+      args_mem_loc[i] = new_args + (args_mem_loc[i] - args);  //change to new args
+    }
+  }
+
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
+
+  data = &no_wait_data;
+  data->type        = EnqueueNativeKernel;
+  data->mem_list    = mem_list;
+  data->ptr         = new_args;
+  data->size        = cb_args;
+  data->offset      = (size_t)num_mem_objects;
+  data->const_ptr   = args_mem_loc;
+  data->user_func   = user_func;
+
+  if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
+                   event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) {
+    err = cl_enqueue_handle(data);
+    if(event) cl_event_set_status(*event, CL_COMPLETE);
+  }
+
+error:
+  return err;
 }
 
 cl_int
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 989b044..660e1d8 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -66,8 +66,8 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
    offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
    dst_ptr = (char *)data->ptr + offset;
 
-   if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch &&
-       (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch)))
+   if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
+       (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
    {
      memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
    }
@@ -131,8 +131,8 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
   offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
   src_ptr = (char*)data->const_ptr + offset;
 
-  if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch &&
-      (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch)))
+  if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
+      (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
   {
     memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
   }
@@ -351,6 +351,32 @@ error:
   return err;
 }
 
+cl_int cl_enqueue_native_kernel(enqueue_data *data)
+{
+  cl_int err = CL_SUCCESS;
+  cl_uint num_mem_objects = (cl_uint)data->offset;
+  const cl_mem *mem_list = data->mem_list;
+  const void **args_mem_loc = (const void **)data->const_ptr;
+  cl_uint i;
+
+  for (i=0; i<num_mem_objects; ++i)
+  {
+      const cl_mem buffer = mem_list[i];
+      CHECK_MEM(buffer);
+
+      *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer);
+  }
+  data->user_func(data->ptr);
+
+  for (i=0; i<num_mem_objects; ++i)
+  {
+      cl_mem_unmap_auto(mem_list[i]);
+  }
+
+  free(data->ptr);
+error:
+  return err;
+}
 cl_int cl_enqueue_handle(enqueue_data* data)
 {
   switch(data->type) {
@@ -375,7 +401,10 @@ cl_int cl_enqueue_handle(enqueue_data* data)
     case EnqueueCopyBufferRect:
     case EnqueueCopyImage:
     case EnqueueNDRangeKernel:
-      cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);   //goto default
+      cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
+      return CL_SUCCESS;
+    case EnqueueNativeKernel:
+      return cl_enqueue_native_kernel(data);
     default:
       return CL_SUCCESS;
   }
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 848c7c4..f90f921 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -40,6 +40,7 @@ typedef enum {
   EnqueueMapImage,
   EnqueueUnmapMemObject,
   EnqueueNDRangeKernel,
+  EnqueueNativeKernel,
   EnqueueInvalid
 } enqueue_type;
 
@@ -56,9 +57,10 @@ typedef struct _enqueue_data {
   size_t            slice_pitch;      /* Slice pitch */
   size_t            host_row_pitch;   /* Host row pitch, used in read/write buffer rect */
   size_t            host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */
-  cl_map_flags      map_flags;        /* Map flags */
   const void *      const_ptr;        /* Const ptr for memory read */
-  void *            ptr;              /* ptr for write and return value */
+  void *            ptr;              /* Ptr for write and return value */
+  const cl_mem*     mem_list;         /* mem_list of clEnqueueNativeKernel */
+  void (*user_func)(void *);          /* pointer to a host-callable user function */
 } enqueue_data;
 
 /* Do real enqueue commands */
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index feb4ab3..1eb790f 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -59,7 +59,7 @@
 .endian_little = CL_TRUE,
 .available = CL_TRUE,
 .compiler_available = CL_FALSE, /* XXX */
-.execution_capabilities = CL_EXEC_KERNEL,
+.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
 .queue_properties = CL_QUEUE_PROFILING_ENABLE,
 .platform = NULL, /* == intel_platform (set when requested) */
 /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
-- 
1.8.1.2



More information about the Beignet mailing list