[Beignet] [PATCH 1/3] Implement api clEnqueueTask and clEnqueueNativeKernel.

Zhigang Gong zhigang.gong at linux.intel.com
Thu Sep 12 22:15:22 PDT 2013


LGTM, will push it latter. Thanks.

On Mon, Sep 09, 2013 at 04:10:21PM +0800, Yang Rong wrote:
> Also refine the whole memcpy's condition in function
> cl_enqueue_read_buffer_rect and cl_enqueue_write_buffer_rect.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_api.c       | 83 +++++++++++++++++++++++++++++++++++++++++++-----------
>  src/cl_enqueue.c   | 65 ++++++++++++++++++++++++++++++------------
>  src/cl_enqueue.h   |  7 +++--
>  src/cl_gt_device.h |  2 +-
>  4 files changed, 119 insertions(+), 38 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 168bcfb..f014b41 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1272,7 +1272,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
>  
>    data = &defer_enqueue_data;
>    data->type    = EnqueueReadBuffer;
> -  data->mem_obj = buffer;
> +  data->mem_obj = &buffer;
>    data->ptr     = ptr;
>    data->offset  = offset;
>    data->size    = size;
> @@ -1353,7 +1353,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueReadBufferRect;
> -  data->mem_obj     = buffer;
> +  data->mem_obj     = &buffer;
>    data->ptr         = ptr;
>    data->origin[0]   = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2];
>    data->host_origin[0]  = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2];
> @@ -1411,7 +1411,7 @@ clEnqueueWriteBuffer(cl_command_queue    command_queue,
>  
>    data = &no_wait_data;
>    data->type      = EnqueueWriteBuffer;
> -  data->mem_obj   = buffer;
> +  data->mem_obj   = &buffer;
>    data->const_ptr = ptr;
>    data->offset    = offset;
>    data->size      = size;
> @@ -1493,7 +1493,7 @@ clEnqueueWriteBufferRect(cl_command_queue     command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueWriteBufferRect;
> -  data->mem_obj     = buffer;
> +  data->mem_obj     = &buffer;
>    data->const_ptr   = ptr;
>    data->origin[0]   = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2];
>    data->host_origin[0]  = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2];
> @@ -1683,7 +1683,7 @@ clEnqueueReadImage(cl_command_queue      command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueReadImage;
> -  data->mem_obj     = mem;
> +  data->mem_obj     = &mem;
>    data->ptr         = ptr;
>    data->origin[0]   = origin[0];  data->origin[1] = origin[1];  data->origin[2] = origin[2];
>    data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
> @@ -1765,7 +1765,7 @@ clEnqueueWriteImage(cl_command_queue     command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueWriteImage;
> -  data->mem_obj     = mem;
> +  data->mem_obj     = &mem;
>    data->const_ptr   = ptr;
>    data->origin[0]   = origin[0];  data->origin[1] = origin[1];  data->origin[2] = origin[2];
>    data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
> @@ -1860,7 +1860,7 @@ error:
>  
>  cl_int
>  clEnqueueCopyImageToBuffer(cl_command_queue  command_queue,
> -                           cl_mem            src_image,
> +                           cl_mem            src_mem,
>                             cl_mem            dst_buffer,
>                             const size_t *    src_origin,
>                             const size_t *    region,
> @@ -2001,10 +2001,9 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueMapBuffer;
> -  data->mem_obj     = buffer;
> +  data->mem_obj     = &buffer;
>    data->offset      = offset;
>    data->size        = size;
> -  data->map_flags   = map_flags;
>    data->ptr         = ptr;
>  
>    if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> @@ -2088,12 +2087,11 @@ clEnqueueMapImage(cl_command_queue   command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueMapImage;
> -  data->mem_obj     = mem;
> +  data->mem_obj     = &mem;
>    data->origin[0]   = origin[0];  data->origin[1] = origin[1];  data->origin[2] = origin[2];
>    data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
>    data->row_pitch   = *image_row_pitch;
>    data->slice_pitch = *image_slice_pitch;
> -  data->map_flags   = map_flags;
>    data->ptr         = ptr;
>    data->offset      = offset;
>  
> @@ -2131,7 +2129,7 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
>  
>    data = &no_wait_data;
>    data->type        = EnqueueUnmapMemObject;
> -  data->mem_obj     = memobj;
> +  data->mem_obj     = &memobj;
>    data->ptr         = mapped_ptr;
>  
>    if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> @@ -2252,8 +2250,11 @@ clEnqueueTask(cl_command_queue   command_queue,
>                const cl_event *   event_wait_list,
>                cl_event *         event)
>  {
> -  NOT_IMPLEMENTED;
> -  return 0;
> +  const size_t global_size[3] = {1, 0, 0};
> +  const size_t local_size[3]  = {1, 0, 0};
> +
> +  return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size,
> +                                num_events_in_wait_list, event_wait_list, event);
>  }
>  
>  cl_int
> @@ -2268,8 +2269,58 @@ clEnqueueNativeKernel(cl_command_queue   command_queue,
>                        const cl_event *   event_wait_list,
>                        cl_event *         event)
>  {
> -  NOT_IMPLEMENTED;
> -  return 0;
> +  cl_int err = CL_SUCCESS;
> +  void *new_args = NULL;
> +  enqueue_data *data, no_wait_data = { 0 };
> +  cl_int i;
> +
> +  if(user_func == NULL ||
> +    (args == NULL && cb_args > 0) ||
> +    (args == NULL && num_mem_objects ==0) ||
> +    (args != NULL && cb_args == 0) ||
> +    (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) ||
> +    (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) {
> +    err = CL_INVALID_VALUE;
> +    goto error;
> +  }
> +
> +  //Per spec, need copy args
> +  if (cb_args)
> +  {
> +    new_args = malloc(cb_args);
> +    if (!new_args)
> +    {
> +      err = CL_OUT_OF_HOST_MEMORY;
> +      goto error;
> +    }
> +    memcpy(new_args, args, cb_args);
> +
> +    for (i=0; i<num_mem_objects; ++i)
> +    {
> +      CHECK_MEM(mem_list[i]);
> +      args_mem_loc[i] = new_args + (args_mem_loc[i] - args);  //change to new args
> +    }
> +  }
> +
> +  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
> +
> +  data = &no_wait_data;
> +  data->type        = EnqueueNativeKernel;
> +  data->mem_obj     = mem_list;
> +  data->ptr         = new_args;
> +  data->size        = cb_args;
> +  data->offset      = (size_t)num_mem_objects;
> +  data->const_ptr   = args_mem_loc;
> +  data->user_func   = user_func;
> +
> +  if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> +                   event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) {
> +    err = cl_enqueue_handle(data);
> +    if(event) cl_event_set_status(*event, CL_COMPLETE);
> +  }
> +
> +error:
> +  return err;
>  }
>  
>  cl_int
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index 989b044..3c069fe 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -32,14 +32,14 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
>    cl_int err = CL_SUCCESS;
>    void* src_ptr;
>  
> -  if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
> +  if (!(src_ptr = cl_mem_map_auto(*data->mem_obj))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
>  
>    memcpy(data->ptr, (char*)src_ptr + data->offset, data->size);
>  
> -  err = cl_mem_unmap_auto(data->mem_obj);
> +  err = cl_mem_unmap_auto(*data->mem_obj);
>  
>  error:
>    return err;
> @@ -55,7 +55,7 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
>    const size_t* host_origin = data->host_origin;
>    const size_t* region = data->region;
>  
> -  if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
> +  if (!(src_ptr = cl_mem_map_auto(*data->mem_obj))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> @@ -66,8 +66,8 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
>     offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
>     dst_ptr = (char *)data->ptr + offset;
>  
> -   if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch &&
> -       (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch)))
> +   if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
> +       (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
>     {
>       memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
>     }
> @@ -86,7 +86,7 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
>       }
>     }
>  
> -  err = cl_mem_unmap_auto(data->mem_obj);
> +  err = cl_mem_unmap_auto(*data->mem_obj);
>  
>  error:
>    return err;
> @@ -97,14 +97,14 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
>    cl_int err = CL_SUCCESS;
>    void* dst_ptr;
>  
> -  if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
> +  if (!(dst_ptr = cl_mem_map_auto(*data->mem_obj))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
>  
>    memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size);
>  
> -  err = cl_mem_unmap_auto(data->mem_obj);
> +  err = cl_mem_unmap_auto(*data->mem_obj);
>  
>  error:
>    return err;
> @@ -120,7 +120,7 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
>    const size_t* host_origin = data->host_origin;
>    const size_t* region = data->region;
>  
> -  if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
> +  if (!(dst_ptr = cl_mem_map_auto(*data->mem_obj))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> @@ -131,8 +131,8 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
>    offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
>    src_ptr = (char*)data->const_ptr + offset;
>  
> -  if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch &&
> -      (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch)))
> +  if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
> +      (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
>    {
>      memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
>    }
> @@ -151,7 +151,7 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
>      }
>    }
>  
> -  err = cl_mem_unmap_auto(data->mem_obj);
> +  err = cl_mem_unmap_auto(*data->mem_obj);
>  
>  error:
>    return err;
> @@ -163,7 +163,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
>    cl_int err = CL_SUCCESS;
>    void* src_ptr;
>  
> -  cl_mem mem = data->mem_obj;
> +  cl_mem mem = *data->mem_obj;
>    CHECK_IMAGE(mem, image);
>    const size_t* origin = data->origin;
>    const size_t* region = data->region;
> @@ -208,7 +208,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
>    cl_int err = CL_SUCCESS;
>    void* dst_ptr;
>  
> -  cl_mem mem = data->mem_obj;
> +  cl_mem mem = *data->mem_obj;
>    CHECK_IMAGE(mem, image);
>    const size_t *origin = data->origin;
>    const size_t *region = data->region;
> @@ -252,7 +252,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
>  {
>    void *ptr = NULL;
>    cl_int err = CL_SUCCESS;
> -  cl_mem buffer = data->mem_obj;
> +  cl_mem buffer = *data->mem_obj;
>    //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
>    if (!(ptr = cl_mem_map_gtt(buffer))) {
>      err = CL_MAP_FAILURE;
> @@ -274,7 +274,7 @@ error:
>  cl_int cl_enqueue_map_image(enqueue_data *data)
>  {
>    cl_int err = CL_SUCCESS;
> -  cl_mem mem = data->mem_obj;
> +  cl_mem mem = *data->mem_obj;
>    void *ptr = NULL;
>  
>    if (!(ptr = cl_mem_map_gtt(mem))) {
> @@ -295,7 +295,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
>    size_t mapped_size = 0;
>    void * v_ptr = NULL;
>    void * mapped_ptr = data->ptr;
> -  cl_mem memobj = data->mem_obj;
> +  cl_mem memobj = *data->mem_obj;
>  
>    assert(memobj->mapped_ptr_sz >= memobj->map_ref);
>    INVALID_VALUE_IF(!mapped_ptr);
> @@ -351,6 +351,32 @@ error:
>    return err;
>  }
>  
> +cl_int cl_enqueue_native_kernel(enqueue_data *data)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_uint num_mem_objects = (cl_uint)data->offset;
> +  const cl_mem *mem_list = data->mem_obj;
> +  const void **args_mem_loc = (const void **)data->const_ptr;
> +  cl_uint i;
> +
> +  for (i=0; i<num_mem_objects; ++i)
> +  {
> +      const cl_mem buffer = mem_list[i];
> +      CHECK_MEM(buffer);
> +
> +      *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer);
> +  }
> +  data->user_func(data->ptr);
> +
> +  for (i=0; i<num_mem_objects; ++i)
> +  {
> +      cl_mem_unmap_auto(mem_list[i]);
> +  }
> +
> +  free(data->ptr);
> +error:
> +  return err;
> +}
>  cl_int cl_enqueue_handle(enqueue_data* data)
>  {
>    switch(data->type) {
> @@ -375,7 +401,10 @@ cl_int cl_enqueue_handle(enqueue_data* data)
>      case EnqueueCopyBufferRect:
>      case EnqueueCopyImage:
>      case EnqueueNDRangeKernel:
> -      cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);   //goto default
> +      cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
> +      return CL_SUCCESS;
> +    case EnqueueNativeKernel:
> +      return cl_enqueue_native_kernel(data);
>      default:
>        return CL_SUCCESS;
>    }
> diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
> index 848c7c4..236cc2d 100644
> --- a/src/cl_enqueue.h
> +++ b/src/cl_enqueue.h
> @@ -40,12 +40,13 @@ typedef enum {
>    EnqueueMapImage,
>    EnqueueUnmapMemObject,
>    EnqueueNDRangeKernel,
> +  EnqueueNativeKernel,
>    EnqueueInvalid
>  } enqueue_type;
>  
>  typedef struct _enqueue_data {
>    enqueue_type      type;             /* Command type */
> -  cl_mem            mem_obj;          /* Enqueue's cl_mem */
> +  const cl_mem      *mem_obj;         /* Enqueue's cl_mem */
>    cl_command_queue  queue;            /* Command queue */
>    size_t            offset;           /* Mem object's offset */
>    size_t            size;             /* Size */
> @@ -56,9 +57,9 @@ typedef struct _enqueue_data {
>    size_t            slice_pitch;      /* Slice pitch */
>    size_t            host_row_pitch;   /* Host row pitch, used in read/write buffer rect */
>    size_t            host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */
> -  cl_map_flags      map_flags;        /* Map flags */
>    const void *      const_ptr;        /* Const ptr for memory read */
> -  void *            ptr;              /* ptr for write and return value */
> +  void *            ptr;              /* Ptr for write and return value */
> +  void (*user_func)(void *);          /* pointer to a host-callable user function */
>  } enqueue_data;
>  
>  /* Do real enqueue commands */
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index feb4ab3..1eb790f 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -59,7 +59,7 @@
>  .endian_little = CL_TRUE,
>  .available = CL_TRUE,
>  .compiler_available = CL_FALSE, /* XXX */
> -.execution_capabilities = CL_EXEC_KERNEL,
> +.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
>  .queue_properties = CL_QUEUE_PROFILING_ENABLE,
>  .platform = NULL, /* == intel_platform (set when requested) */
>  /* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
> -- 
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list