[Beignet] [PATCH 1/3] Implement api clEnqueueTask and clEnqueueNativeKernel.
Yang Rong
rong.r.yang at intel.com
Mon Sep 9 01:10:21 PDT 2013
Also refine the whole memcpy's condition in function
cl_enqueue_read_buffer_rect and cl_enqueue_write_buffer_rect.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_api.c | 83 +++++++++++++++++++++++++++++++++++++++++++-----------
src/cl_enqueue.c | 65 ++++++++++++++++++++++++++++++------------
src/cl_enqueue.h | 7 +++--
src/cl_gt_device.h | 2 +-
4 files changed, 119 insertions(+), 38 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 168bcfb..f014b41 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1272,7 +1272,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
data = &defer_enqueue_data;
data->type = EnqueueReadBuffer;
- data->mem_obj = buffer;
+ data->mem_obj = &buffer;
data->ptr = ptr;
data->offset = offset;
data->size = size;
@@ -1353,7 +1353,7 @@ clEnqueueReadBufferRect(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueReadBufferRect;
- data->mem_obj = buffer;
+ data->mem_obj = &buffer;
data->ptr = ptr;
data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2];
data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2];
@@ -1411,7 +1411,7 @@ clEnqueueWriteBuffer(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueWriteBuffer;
- data->mem_obj = buffer;
+ data->mem_obj = &buffer;
data->const_ptr = ptr;
data->offset = offset;
data->size = size;
@@ -1493,7 +1493,7 @@ clEnqueueWriteBufferRect(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueWriteBufferRect;
- data->mem_obj = buffer;
+ data->mem_obj = &buffer;
data->const_ptr = ptr;
data->origin[0] = buffer_origin[0]; data->origin[1] = buffer_origin[1]; data->origin[2] = buffer_origin[2];
data->host_origin[0] = host_origin[0]; data->host_origin[1] = host_origin[1]; data->host_origin[2] = host_origin[2];
@@ -1683,7 +1683,7 @@ clEnqueueReadImage(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueReadImage;
- data->mem_obj = mem;
+ data->mem_obj = &mem;
data->ptr = ptr;
data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2];
data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
@@ -1765,7 +1765,7 @@ clEnqueueWriteImage(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueWriteImage;
- data->mem_obj = mem;
+ data->mem_obj = &mem;
data->const_ptr = ptr;
data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2];
data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
@@ -1860,7 +1860,7 @@ error:
cl_int
clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
- cl_mem src_image,
+ cl_mem src_mem,
cl_mem dst_buffer,
const size_t * src_origin,
const size_t * region,
@@ -2001,10 +2001,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueMapBuffer;
- data->mem_obj = buffer;
+ data->mem_obj = &buffer;
data->offset = offset;
data->size = size;
- data->map_flags = map_flags;
data->ptr = ptr;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
@@ -2088,12 +2087,11 @@ clEnqueueMapImage(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueMapImage;
- data->mem_obj = mem;
+ data->mem_obj = &mem;
data->origin[0] = origin[0]; data->origin[1] = origin[1]; data->origin[2] = origin[2];
data->region[0] = region[0]; data->region[1] = region[1]; data->region[2] = region[2];
data->row_pitch = *image_row_pitch;
data->slice_pitch = *image_slice_pitch;
- data->map_flags = map_flags;
data->ptr = ptr;
data->offset = offset;
@@ -2131,7 +2129,7 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue,
data = &no_wait_data;
data->type = EnqueueUnmapMemObject;
- data->mem_obj = memobj;
+ data->mem_obj = &memobj;
data->ptr = mapped_ptr;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
@@ -2252,8 +2250,11 @@ clEnqueueTask(cl_command_queue command_queue,
const cl_event * event_wait_list,
cl_event * event)
{
- NOT_IMPLEMENTED;
- return 0;
+ const size_t global_size[3] = {1, 0, 0};
+ const size_t local_size[3] = {1, 0, 0};
+
+ return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_size, local_size,
+ num_events_in_wait_list, event_wait_list, event);
}
cl_int
@@ -2268,8 +2269,58 @@ clEnqueueNativeKernel(cl_command_queue command_queue,
const cl_event * event_wait_list,
cl_event * event)
{
- NOT_IMPLEMENTED;
- return 0;
+ cl_int err = CL_SUCCESS;
+ void *new_args = NULL;
+ enqueue_data *data, no_wait_data = { 0 };
+ cl_int i;
+
+ if(user_func == NULL ||
+ (args == NULL && cb_args > 0) ||
+ (args == NULL && num_mem_objects ==0) ||
+ (args != NULL && cb_args == 0) ||
+ (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc == NULL)) ||
+ (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc != NULL))) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ //Per spec, need copy args
+ if (cb_args)
+ {
+ new_args = malloc(cb_args);
+ if (!new_args)
+ {
+ err = CL_OUT_OF_HOST_MEMORY;
+ goto error;
+ }
+ memcpy(new_args, args, cb_args);
+
+ for (i=0; i<num_mem_objects; ++i)
+ {
+ CHECK_MEM(mem_list[i]);
+ args_mem_loc[i] = new_args + (args_mem_loc[i] - args); //change to new args
+ }
+ }
+
+ TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, command_queue->ctx);
+
+ data = &no_wait_data;
+ data->type = EnqueueNativeKernel;
+ data->mem_obj = mem_list;
+ data->ptr = new_args;
+ data->size = cb_args;
+ data->offset = (size_t)num_mem_objects;
+ data->const_ptr = args_mem_loc;
+ data->user_func = user_func;
+
+ if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
+ event, data, CL_COMMAND_NATIVE_KERNEL) == CL_ENQUEUE_EXECUTE_IMM) {
+ err = cl_enqueue_handle(data);
+ if(event) cl_event_set_status(*event, CL_COMPLETE);
+ }
+
+error:
+ return err;
}
cl_int
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 989b044..3c069fe 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -32,14 +32,14 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
cl_int err = CL_SUCCESS;
void* src_ptr;
- if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(src_ptr = cl_mem_map_auto(*data->mem_obj))) {
err = CL_MAP_FAILURE;
goto error;
}
memcpy(data->ptr, (char*)src_ptr + data->offset, data->size);
- err = cl_mem_unmap_auto(data->mem_obj);
+ err = cl_mem_unmap_auto(*data->mem_obj);
error:
return err;
@@ -55,7 +55,7 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
const size_t* host_origin = data->host_origin;
const size_t* region = data->region;
- if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(src_ptr = cl_mem_map_auto(*data->mem_obj))) {
err = CL_MAP_FAILURE;
goto error;
}
@@ -66,8 +66,8 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
dst_ptr = (char *)data->ptr + offset;
- if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch &&
- (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch)))
+ if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
+ (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
{
memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
}
@@ -86,7 +86,7 @@ cl_int cl_enqueue_read_buffer_rect(enqueue_data* data)
}
}
- err = cl_mem_unmap_auto(data->mem_obj);
+ err = cl_mem_unmap_auto(*data->mem_obj);
error:
return err;
@@ -97,14 +97,14 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
cl_int err = CL_SUCCESS;
void* dst_ptr;
- if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(dst_ptr = cl_mem_map_auto(*data->mem_obj))) {
err = CL_MAP_FAILURE;
goto error;
}
memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size);
- err = cl_mem_unmap_auto(data->mem_obj);
+ err = cl_mem_unmap_auto(*data->mem_obj);
error:
return err;
@@ -120,7 +120,7 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
const size_t* host_origin = data->host_origin;
const size_t* region = data->region;
- if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(dst_ptr = cl_mem_map_auto(*data->mem_obj))) {
err = CL_MAP_FAILURE;
goto error;
}
@@ -131,8 +131,8 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
offset = host_origin[0] + data->host_row_pitch*host_origin[1] + data->host_slice_pitch*host_origin[2];
src_ptr = (char*)data->const_ptr + offset;
- if (!origin[0] && !host_origin[0] && data->row_pitch == data->host_row_pitch &&
- (region[2] == 1 || (!origin[1] && !host_origin[1] && data->slice_pitch == data->host_slice_pitch)))
+ if (data->row_pitch == region[0] && data->row_pitch == data->host_row_pitch &&
+ (region[2] == 1 || (data->slice_pitch == region[0]*region[1] && data->slice_pitch == data->host_slice_pitch)))
{
memcpy(dst_ptr, src_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]);
}
@@ -151,7 +151,7 @@ cl_int cl_enqueue_write_buffer_rect(enqueue_data *data)
}
}
- err = cl_mem_unmap_auto(data->mem_obj);
+ err = cl_mem_unmap_auto(*data->mem_obj);
error:
return err;
@@ -163,7 +163,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
cl_int err = CL_SUCCESS;
void* src_ptr;
- cl_mem mem = data->mem_obj;
+ cl_mem mem = *data->mem_obj;
CHECK_IMAGE(mem, image);
const size_t* origin = data->origin;
const size_t* region = data->region;
@@ -208,7 +208,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
cl_int err = CL_SUCCESS;
void* dst_ptr;
- cl_mem mem = data->mem_obj;
+ cl_mem mem = *data->mem_obj;
CHECK_IMAGE(mem, image);
const size_t *origin = data->origin;
const size_t *region = data->region;
@@ -252,7 +252,7 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
{
void *ptr = NULL;
cl_int err = CL_SUCCESS;
- cl_mem buffer = data->mem_obj;
+ cl_mem buffer = *data->mem_obj;
//because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
if (!(ptr = cl_mem_map_gtt(buffer))) {
err = CL_MAP_FAILURE;
@@ -274,7 +274,7 @@ error:
cl_int cl_enqueue_map_image(enqueue_data *data)
{
cl_int err = CL_SUCCESS;
- cl_mem mem = data->mem_obj;
+ cl_mem mem = *data->mem_obj;
void *ptr = NULL;
if (!(ptr = cl_mem_map_gtt(mem))) {
@@ -295,7 +295,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
size_t mapped_size = 0;
void * v_ptr = NULL;
void * mapped_ptr = data->ptr;
- cl_mem memobj = data->mem_obj;
+ cl_mem memobj = *data->mem_obj;
assert(memobj->mapped_ptr_sz >= memobj->map_ref);
INVALID_VALUE_IF(!mapped_ptr);
@@ -351,6 +351,32 @@ error:
return err;
}
+cl_int cl_enqueue_native_kernel(enqueue_data *data)
+{
+ cl_int err = CL_SUCCESS;
+ cl_uint num_mem_objects = (cl_uint)data->offset;
+ const cl_mem *mem_list = data->mem_obj;
+ const void **args_mem_loc = (const void **)data->const_ptr;
+ cl_uint i;
+
+ for (i=0; i<num_mem_objects; ++i)
+ {
+ const cl_mem buffer = mem_list[i];
+ CHECK_MEM(buffer);
+
+ *((void **)args_mem_loc[i]) = cl_mem_map_auto(buffer);
+ }
+ data->user_func(data->ptr);
+
+ for (i=0; i<num_mem_objects; ++i)
+ {
+ cl_mem_unmap_auto(mem_list[i]);
+ }
+
+ free(data->ptr);
+error:
+ return err;
+}
cl_int cl_enqueue_handle(enqueue_data* data)
{
switch(data->type) {
@@ -375,7 +401,10 @@ cl_int cl_enqueue_handle(enqueue_data* data)
case EnqueueCopyBufferRect:
case EnqueueCopyImage:
case EnqueueNDRangeKernel:
- cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); //goto default
+ cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);
+ return CL_SUCCESS;
+ case EnqueueNativeKernel:
+ return cl_enqueue_native_kernel(data);
default:
return CL_SUCCESS;
}
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 848c7c4..236cc2d 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -40,12 +40,13 @@ typedef enum {
EnqueueMapImage,
EnqueueUnmapMemObject,
EnqueueNDRangeKernel,
+ EnqueueNativeKernel,
EnqueueInvalid
} enqueue_type;
typedef struct _enqueue_data {
enqueue_type type; /* Command type */
- cl_mem mem_obj; /* Enqueue's cl_mem */
+ const cl_mem *mem_obj; /* Enqueue's cl_mem */
cl_command_queue queue; /* Command queue */
size_t offset; /* Mem object's offset */
size_t size; /* Size */
@@ -56,9 +57,9 @@ typedef struct _enqueue_data {
size_t slice_pitch; /* Slice pitch */
size_t host_row_pitch; /* Host row pitch, used in read/write buffer rect */
size_t host_slice_pitch; /* Host slice pitch, used in read/write buffer rect */
- cl_map_flags map_flags; /* Map flags */
const void * const_ptr; /* Const ptr for memory read */
- void * ptr; /* ptr for write and return value */
+ void * ptr; /* Ptr for write and return value */
+ void (*user_func)(void *); /* pointer to a host-callable user function */
} enqueue_data;
/* Do real enqueue commands */
diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
index feb4ab3..1eb790f 100644
--- a/src/cl_gt_device.h
+++ b/src/cl_gt_device.h
@@ -59,7 +59,7 @@
.endian_little = CL_TRUE,
.available = CL_TRUE,
.compiler_available = CL_FALSE, /* XXX */
-.execution_capabilities = CL_EXEC_KERNEL,
+.execution_capabilities = CL_EXEC_KERNEL | CL_EXEC_NATIVE_KERNEL,
.queue_properties = CL_QUEUE_PROFILING_ENABLE,
.platform = NULL, /* == intel_platform (set when requested) */
/* IEEE 754, XXX does IVB support CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT? */
--
1.8.1.2
More information about the Beignet
mailing list