[Beignet] [PATCH 4/5] Add openCL event support.

Yang Rong rong.r.yang at intel.com
Mon Aug 5 22:37:28 PDT 2013


Now use the defer execute to wait events.
If there is no user event waited, then using wait rendering to wait
GPU event complete and call the enqueue api immediately.
If there is the user events waited, then should prepare the the enqueue
data, and resume the enqueue when all user events that waited complete.
The achieve these, add the enqueue callback to user event, and add the all
user event and other wait event list to enqueue callback. When set user event
to complete, check all enqueue callbacks wait this event.

Now, clEnqueueMark/clEnqueueBarrier still not impletement, and clEnqueueMapBuffer
/clEnqueueMapImage is not consistency with spec.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_api.c                |  444 ++++++++++++++++++++++---------------------
 src/cl_command_queue_gen7.c |    2 -
 src/cl_context.h            |    2 +
 src/cl_event.c              |  375 +++++++++++++++++++++++++++++++++++-
 src/cl_event.h              |   66 ++++++-
 src/cl_internals.h          |    1 +
 src/cl_utils.h              |   14 +-
 7 files changed, 681 insertions(+), 223 deletions(-)

diff --git a/src/cl_api.c b/src/cl_api.c
index 146c010..034102b 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -18,9 +18,11 @@
  */
 
 #include "cl_platform_id.h"
-#include "cl_device_id.h" 
+#include "cl_device_id.h"
 #include "cl_context.h"
 #include "cl_command_queue.h"
+#include "cl_enqueue.h"
+#include "cl_event.h"
 #include "cl_program.h"
 #include "cl_kernel.h"
 #include "cl_mem.h"
@@ -36,6 +38,7 @@
 #include <stdio.h>
 #include <string.h>
 #include <assert.h>
+#include <unistd.h>
 
 #ifndef CL_VERSION_1_2
 #define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
@@ -59,6 +62,21 @@ typedef intptr_t cl_device_partition_property;
 	  return RET; \
 	} while(0)
 
+#define HANDLE_EVENTS(NUM, WAIT, QUEUE, EVENT, DATA, TYPE) \
+  do { \
+    cl_int status = cl_event_wait_events(NUM, WAIT); \
+    cl_event e; \
+    if(EVENT != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { \
+      e = cl_event_new(QUEUE->ctx, QUEUE, TYPE, EVENT!=NULL); \
+      if(EVENT != NULL) \
+        *EVENT = e; \
+      if(status == CL_ENQUEUE_EXECUTE_DEFER) { \
+        cl_event_new_enqueue_callback(e, DATA, NUM, WAIT); \
+        goto error; \
+      } \
+    } \
+  } while(0)
+
 static cl_int
 cl_check_device_type(cl_device_type device_type)
 {
@@ -987,8 +1005,20 @@ cl_int
 clWaitForEvents(cl_uint          num_events,
                 const cl_event * event_list)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  cl_context ctx = NULL;
+
+  if(num_events > 0 && event_list)
+    ctx = event_list[0]->ctx;
+
+  TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx);
+
+  while(cl_event_wait_events(num_events, event_list) == CL_ENQUEUE_EXECUTE_DEFER) {
+    usleep(8000);       //sleep 8ms to wait other thread
+  }
+
+error:
+  return err;
 }
 
 cl_int
@@ -998,38 +1028,94 @@ clGetEventInfo(cl_event      event,
                void *        param_value,
                size_t *      param_value_size_ret)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  CHECK_EVENT(event);
+
+  if (param_name == CL_EVENT_COMMAND_QUEUE) {
+    if(event->queue == NULL) {
+      param_value_size_ret = 0;
+      param_value = NULL;
+      return err;
+    }
+    FILL_GETINFO_RET (cl_command_queue, 1, &event->queue, CL_SUCCESS);
+  } else if (param_name == CL_EVENT_CONTEXT) {
+    FILL_GETINFO_RET (cl_context, 1, &event->ctx, CL_SUCCESS);
+  } else if (param_name == CL_EVENT_COMMAND_TYPE) {
+    FILL_GETINFO_RET (cl_command_type, 1, &event->type, CL_SUCCESS);
+  } else if (param_name == CL_EVENT_COMMAND_EXECUTION_STATUS) {
+    cl_event_update_status(event);
+    FILL_GETINFO_RET (cl_int, 1, &event->status, CL_SUCCESS);
+  } else if (param_name == CL_EVENT_REFERENCE_COUNT) {
+    cl_uint ref = event->ref_n;
+    FILL_GETINFO_RET (cl_int, 1, &ref, CL_SUCCESS);
+  } else {
+    return CL_INVALID_VALUE;
+  }
+
+error:
+  return err;
+
 }
 
 cl_event
 clCreateUserEvent(cl_context context,
                   cl_int *   errcode_ret)
 {
-  NOT_IMPLEMENTED;
-  return NULL;
+  cl_int err = CL_SUCCESS;
+  cl_event event = NULL;
+  CHECK_CONTEXT(context);
+
+  TRY_ALLOC(event, cl_event_new(context, NULL, CL_COMMAND_USER, CL_TRUE));
+
+error:
+  if(errcode_ret)
+    *errcode_ret = err;
+  return event;
 }
 
 cl_int
 clRetainEvent(cl_event  event)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_EVENT(event);
+  cl_event_add_ref(event);
+
+error:
+  return err;
 }
 
 cl_int
 clReleaseEvent(cl_event  event)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_EVENT(event);
+  cl_event_delete(event);
+
+error:
+  return err;
 }
 
 cl_int
 clSetUserEventStatus(cl_event    event,
                      cl_int      execution_status)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_EVENT(event);
+  if(execution_status > CL_COMPLETE) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+  if(event->status != CL_SUBMITTED) {
+    err = CL_INVALID_OPERATION;
+    goto error;
+  }
+
+  cl_event_set_status(event, execution_status);
+error:
+  return err;
 }
 
 cl_int
@@ -1038,8 +1124,20 @@ clSetEventCallback(cl_event     event,
                    void (CL_CALLBACK * pfn_notify) (cl_event, cl_int, void *),
                    void *       user_data)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+
+  CHECK_EVENT(event);
+  if((pfn_notify == NULL) ||
+    (command_exec_callback_type > CL_SUBMITTED) ||
+    (command_exec_callback_type < CL_COMPLETE)) {
+    err = CL_INVALID_VALUE;
+    goto error;
+  }
+  err = cl_event_set_callback(event, command_exec_callback_type, pfn_notify, user_data);
+
+error:
+  return err;
+
 }
 
 cl_int
@@ -1087,8 +1185,7 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
                     cl_event *       event)
 {
   cl_int err = CL_SUCCESS;
-  void* src_ptr;
-
+  enqueue_data *data, defer_enqueue_data = { 0 };
   CHECK_QUEUE(command_queue);
   CHECK_MEM(buffer);
   if (command_queue->ctx != buffer->ctx) {
@@ -1109,15 +1206,20 @@ clEnqueueReadBuffer(cl_command_queue command_queue,
      goto error;
   }
 
-  if (!(src_ptr = cl_mem_map_auto(buffer))) {
-    err = CL_MAP_FAILURE;
-    goto error;
-  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
 
-  memcpy(ptr, (char*)src_ptr + offset, size);
+  data = &defer_enqueue_data;
+  data->type    = EnqueueReadBuffer;
+  data->mem_obj = buffer;
+  data->ptr     = ptr;
+  data->offset  = offset;
+  data->size    = size;
 
-  err = cl_mem_unmap_auto(buffer);
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_READ_BUFFER);
 
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   return err;
 }
@@ -1154,7 +1256,7 @@ clEnqueueWriteBuffer(cl_command_queue    command_queue,
                      cl_event *          event)
 {
   cl_int err = CL_SUCCESS;
-  void* dst_ptr;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_MEM(buffer);
@@ -1176,15 +1278,20 @@ clEnqueueWriteBuffer(cl_command_queue    command_queue,
     goto error;
   }
 
-  if (!(dst_ptr = cl_mem_map_auto(buffer))) {
-    err = CL_MAP_FAILURE;
-    goto error;
-  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
 
-  memcpy((char*)dst_ptr + offset, ptr, size);
+  data = &no_wait_data;
+  data->type      = EnqueueWriteBuffer;
+  data->mem_obj   = buffer;
+  data->const_ptr = ptr;
+  data->offset    = offset;
+  data->size      = size;
 
-  err = cl_mem_unmap_auto(buffer);
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_WRITE_BUFFER);
 
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   return err;
 }
@@ -1257,7 +1364,7 @@ clEnqueueReadImage(cl_command_queue      command_queue,
                    cl_event *            event)
 {
   cl_int err = CL_SUCCESS;
-  void* src_ptr;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_IMAGE(image);
@@ -1304,36 +1411,22 @@ clEnqueueReadImage(cl_command_queue      command_queue,
      goto error;
   }
 
-  if (!(src_ptr = cl_mem_map_auto(image))) {
-    err = CL_MAP_FAILURE;
-    goto error;
-  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
 
-  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
-  src_ptr = (char*)src_ptr + offset;
+  data = &no_wait_data;
+  data->type        = EnqueueReadImage;
+  data->mem_obj     = image;
+  data->ptr         = ptr;
+  data->origin[0]   = origin[0];  data->origin[1] = origin[1];  data->origin[2] = origin[2];
+  data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
+  data->row_pitch   = row_pitch;
+  data->slice_pitch = slice_pitch;
 
-  if (!origin[0] && region[0] == image->w && row_pitch == image->row_pitch &&
-      (region[2] == 1 || (!origin[1] && region[1] == image->h && slice_pitch == image->slice_pitch)))
-  {
-    memcpy(ptr, src_ptr, region[2] == 1 ? row_pitch*region[1] : slice_pitch*region[2]);
-  }
-  else {
-    cl_uint y, z;
-    for (z = 0; z < region[2]; z++) {
-      const char* src = src_ptr;
-      char* dst = ptr;
-      for (y = 0; y < region[1]; y++) {
-	memcpy(dst, src, image->bpp*region[0]);
-	src += image->row_pitch;
-	dst += row_pitch;
-      }
-      src_ptr = (char*)src_ptr + image->slice_pitch;
-      ptr = (char*)ptr + slice_pitch;
-    }
-  }
-
-  err = cl_mem_unmap_auto(image);
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_READ_IMAGE);
 
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   return err;
 }
@@ -1352,7 +1445,7 @@ clEnqueueWriteImage(cl_command_queue     command_queue,
                     cl_event *           event)
 {
   cl_int err = CL_SUCCESS;
-  void* dst_ptr;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_IMAGE(image);
@@ -1399,36 +1492,22 @@ clEnqueueWriteImage(cl_command_queue     command_queue,
     goto error;
   }
 
-  if (!(dst_ptr = cl_mem_map_auto(image))) {
-    err = CL_MAP_FAILURE;
-    goto error;
-  }
-
-  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
-  dst_ptr = (char*)dst_ptr + offset;
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
 
-  if (!origin[0] && region[0] == image->w && row_pitch == image->row_pitch &&
-      (region[2] == 1 || (!origin[1] && region[1] == image->h && slice_pitch == image->slice_pitch)))
-  {
-    memcpy(dst_ptr, ptr, region[2] == 1 ? row_pitch*region[1] : slice_pitch*region[2]);
-  }
-  else {
-    cl_uint y, z;
-    for (z = 0; z < region[2]; z++) {
-      const char* src = ptr;
-      char* dst = dst_ptr;
-      for (y = 0; y < region[1]; y++) {
-	memcpy(dst, src, image->bpp*region[0]);
-	src += row_pitch;
-	dst += image->row_pitch;
-      }
-      ptr = (char*)ptr + slice_pitch;
-      dst_ptr = (char*)dst_ptr + image->slice_pitch;
-    }
-  }
+  data = &no_wait_data;
+  data->type        = EnqueueWriteImage;
+  data->mem_obj     = image;
+  data->const_ptr   = ptr;
+  data->origin[0]   = origin[0];  data->origin[1] = origin[1];  data->origin[2] = origin[2];
+  data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
+  data->row_pitch   = row_pitch;
+  data->slice_pitch = slice_pitch;
 
-  err = cl_mem_unmap_auto(image);
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_WRITE_IMAGE);
 
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   return err;
 }
@@ -1490,10 +1569,8 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
                    cl_event *        event,
                    cl_int *          errcode_ret)
 {
-  void *ptr = NULL;
-  void *mem_ptr = NULL;
   cl_int err = CL_SUCCESS;
-  int slot = -1;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_MEM(buffer);
@@ -1519,73 +1596,24 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
     goto error;
   }
 
-  if (!(ptr = cl_mem_map_auto(buffer))) {
-    err = CL_MAP_FAILURE;
-    goto error;
-  }
-
-  ptr = (char*)ptr + offset;
-
-  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
-    assert(buffer->host_ptr);
-    memcpy(buffer->host_ptr + offset, ptr, size);
-    mem_ptr = buffer->host_ptr + offset;
-  } else {
-    mem_ptr = ptr;
-  }
-
-  /* Record the mapped address. */
-  if (!buffer->mapped_ptr_sz) {
-    buffer->mapped_ptr_sz = 16;
-    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
-          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
-    if (!buffer->mapped_ptr) {
-      cl_mem_unmap_auto (buffer);
-      err = CL_OUT_OF_HOST_MEMORY;
-      ptr = NULL;
-      goto error;
-    }
-
-    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
-    slot = 0;
-  } else {
-    int i = 0;
-    for (; i < buffer->mapped_ptr_sz; i++) {
-      if (buffer->mapped_ptr[i].ptr == NULL) {
-        slot = i;
-        break;
-      }
-    }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
 
-    if (i == buffer->mapped_ptr_sz) {
-      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
-          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
-      if (!new_ptr) {
-        cl_mem_unmap_auto (buffer);
-        err = CL_OUT_OF_HOST_MEMORY;
-        ptr = NULL;
-        goto error;
-      }
-      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
-      memcpy(new_ptr, buffer->mapped_ptr,
-             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
-      slot = buffer->mapped_ptr_sz;
-      buffer->mapped_ptr_sz *= 2;
-      free(buffer->mapped_ptr);
-      buffer->mapped_ptr = new_ptr;
-    }
-  }
+  data = &no_wait_data;
+  data->type        = EnqueueMapBuffer;
+  data->mem_obj     = buffer;
+  data->offset      = offset;
+  data->size        = size;
+  data->map_flags   = map_flags;
 
-  assert(slot != -1);
-  buffer->mapped_ptr[slot].ptr = mem_ptr;
-  buffer->mapped_ptr[slot].v_ptr = ptr;
-  buffer->mapped_ptr[slot].size = size;
-  buffer->map_ref++;
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_MAP_BUFFER);
 
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   if (errcode_ret)
     *errcode_ret = err;
-  return mem_ptr;
+  return data->ptr;
 }
 
 void *
@@ -1602,8 +1630,8 @@ clEnqueueMapImage(cl_command_queue   command_queue,
                   cl_event *         event,
                   cl_int *           errcode_ret)
 {
-  void *ptr = NULL;
   cl_int err = CL_SUCCESS;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_IMAGE(image);
@@ -1638,18 +1666,26 @@ clEnqueueMapImage(cl_command_queue   command_queue,
     goto error;
   }
 
-  if (!(ptr = cl_mem_map_auto(image))) {
-    err = CL_MAP_FAILURE;
-    goto error;
-  }
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
 
-  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
-  ptr = (char*)ptr + offset;
+  data = &no_wait_data;
+  data->type        = EnqueueMapImage;
+  data->mem_obj     = image;
+  data->origin[0]   = origin[0];  data->origin[1] = origin[1];  data->origin[2] = origin[2];
+  data->region[0]   = region[0];  data->region[1] = region[1];  data->region[2] = region[2];
+  data->row_pitch   = *image_row_pitch;
+  data->slice_pitch = *image_slice_pitch;
+  data->map_flags   = map_flags;
 
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_MAP_IMAGE);
+
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   if (errcode_ret)
     *errcode_ret = err;
-  return ptr;
+  return data->ptr; //TODO: map and unmap first
 }
 
 cl_int
@@ -1661,9 +1697,7 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
                         cl_event *        event)
 {
   cl_int err = CL_SUCCESS;
-  int i;
-  size_t mapped_size = 0;
-  void * v_ptr = NULL;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_MEM(memobj);
@@ -1672,56 +1706,18 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
     goto error;
   }
 
-  assert(memobj->mapped_ptr_sz >= memobj->map_ref);
-  INVALID_VALUE_IF(!mapped_ptr);
-  for (i = 0; i < memobj->mapped_ptr_sz; i++) {
-    if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
-      memobj->mapped_ptr[i].ptr = NULL;
-      mapped_size = memobj->mapped_ptr[i].size;
-      v_ptr = memobj->mapped_ptr[i].v_ptr;
-      memobj->mapped_ptr[i].size = 0;
-      memobj->mapped_ptr[i].v_ptr = NULL;
-      memobj->map_ref--;
-      break;
-    }
-  }
-  /* can not find a mapped address? */
-  INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
-
-  if (memobj->flags & CL_MEM_USE_HOST_PTR) {
-    assert(mapped_ptr >= memobj->host_ptr &&
-      mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
-    /* Sync the data. */
-    memcpy(v_ptr, mapped_ptr, mapped_size);
-  } else {
-    assert(v_ptr == mapped_ptr);
-  }
-
-  cl_mem_unmap_auto(memobj);
+  TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, memobj->ctx);
 
-  /* shrink the mapped slot. */
-  if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
-    int j = 0;
-    cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
-	sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
-    if (!new_ptr) {
-      /* Just do nothing. */
-      goto error;
-    }
-    memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
+  data = &no_wait_data;
+  data->type        = EnqueueUnmapMemObject;
+  data->mem_obj     = memobj;
+  data->ptr         = mapped_ptr;
 
-    for (i = 0; i < memobj->mapped_ptr_sz; i++) {
-      if (memobj->mapped_ptr[i].ptr) {
-        new_ptr[j] = memobj->mapped_ptr[i];
-        j++;
-        assert(j < memobj->mapped_ptr_sz/2);
-      }
-    }
-    memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
-    free(memobj->mapped_ptr);
-    memobj->mapped_ptr = new_ptr;
-  }
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_UNMAP_MEM_OBJECT);
 
+  err = cl_enqueue_handle(data);
+  if(event) cl_event_set_status(*event, CL_COMPLETE);
 error:
   return err;
 }
@@ -1742,6 +1738,7 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
   size_t fixed_local_sz[] = {1,1,1};
   cl_int err = CL_SUCCESS;
   cl_uint i;
+  enqueue_data *data, no_wait_data = { 0 };
 
   CHECK_QUEUE(command_queue);
   CHECK_KERNEL(kernel);
@@ -1774,8 +1771,8 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
     }
 
   /* Local sizes must be non-null and divide global sizes */
-  if (local_work_size != NULL) 
-    for (i = 0; i < work_dim; ++i) 
+  if (local_work_size != NULL)
+    for (i = 0; i < work_dim; ++i)
       if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] % local_work_size[i])) {
         err = CL_INVALID_WORK_GROUP_SIZE;
         goto error;
@@ -1789,9 +1786,9 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
   }
 
   /* XXX No event right now */
-  FATAL_IF(num_events_in_wait_list > 0, "Events are not supported");
-  FATAL_IF(event_wait_list != NULL, "Events are not supported");
-  FATAL_IF(event != NULL, "Events are not supported");
+  //FATAL_IF(num_events_in_wait_list > 0, "Events are not supported");
+  //FATAL_IF(event_wait_list != NULL, "Events are not supported");
+  //FATAL_IF(event != NULL, "Events are not supported");
 
   if (local_work_size != NULL)
     for (i = 0; i < work_dim; ++i)
@@ -1810,6 +1807,16 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
                                   fixed_global_off,
                                   fixed_global_sz,
                                   fixed_local_sz);
+  if(err != CL_SUCCESS)
+    goto error;
+
+  data = &no_wait_data;
+  data->type = EnqueueNDRangeKernel;
+  data->queue = command_queue;
+  HANDLE_EVENTS(num_events_in_wait_list, event_wait_list,
+                command_queue, event, data, CL_COMMAND_NDRANGE_KERNEL);
+
+  err = cl_command_queue_flush(command_queue);
 
 error:
   return err;
@@ -1855,8 +1862,12 @@ clEnqueueWaitForEvents(cl_command_queue  command_queue,
                        cl_uint           num_events,
                        const cl_event *  event_list)
 {
-  NOT_IMPLEMENTED;
-  return 0;
+  cl_int err = CL_SUCCESS;
+  CHECK_QUEUE(command_queue);
+  err = clWaitForEvents(num_events, event_list);
+
+error:
+  return err;
 }
 
 cl_int
@@ -1864,6 +1875,7 @@ clEnqueueBarrier(cl_command_queue  command_queue)
 {
   NOT_IMPLEMENTED;
   return 0;
+  //return clFinish(command_queue);
 }
 
 #define EXTFUNC(x)                      \
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 048595c..3cc01ba 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -258,8 +258,6 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
 
   /* Close the batch buffer and submit it */
   cl_gpgpu_batch_end(gpgpu, 0);
-  cl_gpgpu_flush(gpgpu);
-
 error:
   return err;
 }
diff --git a/src/cl_context.h b/src/cl_context.h
index 80bf777..718d589 100644
--- a/src/cl_context.h
+++ b/src/cl_context.h
@@ -62,10 +62,12 @@ struct _cl_context {
   cl_program programs;              /* All programs currently allocated */
   cl_mem buffers;                   /* All memory object currently allocated */
   cl_sampler samplers;              /* All sampler object currently allocated */
+  cl_event   events;                /* All event object currently allocated */
   pthread_mutex_t queue_lock;       /* To allocate and deallocate queues */
   pthread_mutex_t program_lock;     /* To allocate and deallocate programs */
   pthread_mutex_t buffer_lock;      /* To allocate and deallocate buffers */
   pthread_mutex_t sampler_lock;     /* To allocate and deallocate samplers */
+  pthread_mutex_t event_lock;       /* To allocate and deallocate events */
   uint32_t ver;                     /* Gen version */
   struct _cl_context_prop props;
   cl_context_properties * prop_user; /* a copy of user passed context properties when create context */
diff --git a/src/cl_event.c b/src/cl_event.c
index 6539b05..5a7bd35 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -14,7 +14,376 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
+ * Author: Rong Yang <rong.r.yang at intel.com>
  */
-struct empty {int dummy;};
 
+#include "cl_event.h"
+#include "cl_context.h"
+#include "cl_utils.h"
+#include "cl_alloc.h"
+#include "cl_khr_icd.h"
+#include "cl_kernel.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type, cl_bool emplict)
+{
+  cl_event event = NULL;
+
+  /* Allocate and inialize the structure itself */
+  TRY_ALLOC_NO_ERR (event, CALLOC(struct _cl_event));
+  SET_ICD(event->dispatch)
+  event->magic = CL_MAGIC_EVENT_HEADER;
+  event->ref_n = 1;
+
+  /* Append the event in the context event list */
+  pthread_mutex_lock(&ctx->event_lock);
+    event->next = ctx->events;
+    if (ctx->events != NULL)
+      ctx->events->prev = event;
+    ctx->events = event;
+  pthread_mutex_unlock(&ctx->event_lock);
+  event->ctx   = ctx;
+  cl_context_add_ref(ctx);
+
+  /* Initialize all members and create GPGPU event object */
+  event->queue = queue;
+  event->type  = type;
+  event->gpgpu_event = NULL;
+  if(type == CL_COMMAND_USER) {
+    event->status = CL_SUBMITTED;
+  }
+  else {
+    event->status = CL_QUEUED;
+    event->gpgpu_event = cl_gpgpu_event_new(queue->gpgpu);
+  }
+  cl_event_add_ref(event);       //dec when complete
+  event->user_cb = NULL;
+  event->enqueue_cb = NULL;
+  event->waits_head = NULL;
+  event->emplict = emplict;
+
+exit:
+  return event;
+error:
+  cl_event_delete(event);
+  event = NULL;
+  goto exit;
+}
+
+void cl_event_delete(cl_event event)
+{
+  if (UNLIKELY(event == NULL))
+    return;
+
+  if (atomic_dec(&event->ref_n) > 1)
+    return;
+
+  /* Call all user's callback if haven't execute */
+  user_callback *cb = event->user_cb;
+  while(event->user_cb) {
+    cb = event->user_cb;
+    if(cb->executed == CL_FALSE) {
+      cb->pfn_notify(event, event->status, cb->user_data);
+    }
+    event->user_cb = cb->next;
+    cl_free(cb);
+  }
+
+  /* delete gpgpu event object */
+  if(event->gpgpu_event)
+    cl_gpgpu_event_delete(event->gpgpu_event);
+
+  /* Remove it from the list */
+  assert(event->ctx);
+  pthread_mutex_lock(&event->ctx->event_lock);
+    if (event->prev)
+      event->prev->next = event->next;
+    if (event->next)
+      event->next->prev = event->prev;
+    if (event->prev == NULL && event->next == NULL)
+      event->ctx->events = NULL;
+  pthread_mutex_unlock(&event->ctx->event_lock);
+  cl_context_delete(event->ctx);
+
+  cl_free(event);
+}
+
+void cl_event_add_ref(cl_event event)
+{
+  assert(event);
+  atomic_inc(&event->ref_n);
+}
+
+cl_int cl_event_set_callback(cl_event event ,
+                                  cl_int command_exec_callback_type,
+                                  EVENT_NOTIFY pfn_notify,
+                                  void* user_data)
+{
+  assert(event);
+  assert(pfn_notify);
+
+  cl_int err = CL_SUCCESS;
+  user_callback *cb;
+  TRY_ALLOC(cb, CALLOC(user_callback));
+
+  cb->pfn_notify  = pfn_notify;
+  cb->user_data   = user_data;
+  cb->status      = command_exec_callback_type;
+  cb->executed    = CL_FALSE;
+
+  cb->next        = event->user_cb;
+  event->user_cb  = cb;
+
+exit:
+  return err;
+error:
+  err = CL_OUT_OF_HOST_MEMORY;
+  cl_free(cb);
+  goto exit;
+};
+
+cl_int cl_event_check_waitlist(cl_uint num_events_in_wait_list,
+                                    const cl_event *event_wait_list,
+                                    cl_event *event,cl_context ctx)
+{
+  cl_int err = CL_SUCCESS;
+  cl_int i;
+  /* check the event_wait_list and num_events_in_wait_list */
+  if((event_wait_list == NULL) &&
+     (num_events_in_wait_list > 0))
+    goto exit;
+
+  if ((event_wait_list != NULL) &&
+      (num_events_in_wait_list == 0)){
+    goto error;
+  }
+
+  /* check the event and context */
+  for(i=0; i<num_events_in_wait_list; i++) {
+    CHECK_EVENT(event_wait_list[i]);
+    if(event_wait_list[i]->status < CL_COMPLETE) {
+      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
+      goto exit;
+    }
+    if(event && *event == event_wait_list[i])
+      goto error;
+    if(event_wait_list[i]->ctx != ctx)
+      goto error;
+  }
+
+exit:
+  return err;
+error:
+  err = CL_INVALID_EVENT_WAIT_LIST;  //reset error
+  goto exit;
+}
+
+cl_int cl_event_wait_events(cl_uint num_events_in_wait_list,
+                          const cl_event *event_wait_list)
+{
+  cl_int i, j;
+  /* Check whether wait user events */
+  for(i=0; i<num_events_in_wait_list; i++) {
+    if(event_wait_list[i]->status <= CL_COMPLETE)
+      continue;
+
+    /* Need wait on user event, return and do enqueue defer */
+    if((event_wait_list[i]->type == CL_COMMAND_USER) ||
+       (event_wait_list[i]->enqueue_cb &&
+       (event_wait_list[i]->enqueue_cb->wait_user_events != NULL))){
+      for(j=0; j<num_events_in_wait_list; j++)
+        cl_event_add_ref(event_wait_list[j]);  //add defer enqueue's wait event reference
+      return CL_ENQUEUE_EXECUTE_DEFER;
+    }
+  }
+
+  /* Non user events or all user event finished, wait all enqueue events finish */
+  for(i=0; i<num_events_in_wait_list; i++) {
+    if(event_wait_list[i]->status <= CL_COMPLETE)
+      continue;
+
+    //enqueue callback haven't finish, in another thread, wait
+    if(event_wait_list[i]->enqueue_cb != NULL)
+      return CL_ENQUEUE_EXECUTE_DEFER;
+    cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1);
+    cl_event_set_status(event_wait_list[i], CL_COMPLETE);  //Execute user's callback
+  }
+  return CL_ENQUEUE_EXECUTE_IMM;
+}
+
+void cl_event_new_enqueue_callback(cl_event event,
+                                            enqueue_data *data,
+                                            cl_uint num_events_in_wait_list,
+                                            const cl_event *event_wait_list)
+{
+  enqueue_callback *cb, *node;
+  user_event *user_events, *u_ev;
+  cl_int i;
+
+  /* Allocate and inialize the structure itself */
+  TRY_ALLOC_NO_ERR (cb, CALLOC(enqueue_callback));
+  cb->num_events = num_events_in_wait_list;
+  cb->wait_list = event_wait_list;
+  cb->event = event;
+  cb->next = NULL;
+  cb->wait_user_events = NULL;
+
+  /* Find out all user events that events in event_wait_list wait */
+  for(i=0; i<num_events_in_wait_list; i++) {
+    if(event_wait_list[i]->status <= CL_COMPLETE)
+      continue;
+
+    if(event_wait_list[i]->type == CL_COMMAND_USER) {
+      /* Insert the enqueue_callback to user event list */
+      node = event_wait_list[i]->waits_head;
+      if(node == NULL)
+        event_wait_list[i]->waits_head = cb;
+      else {
+        while((node != cb) && node->next)
+          node = node->next;
+        if(node == cb)   //wait on dup user event
+          continue;
+        node->next = cb;
+      }
+      /* Insert the user event to enqueue_callback's wait_user_events */
+      TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
+      u_ev->event = event_wait_list[i];
+      u_ev->next = cb->wait_user_events;
+      cb->wait_user_events = u_ev;
+    } else if(event_wait_list[i]->enqueue_cb != NULL) {
+      user_events = event_wait_list[i]->enqueue_cb->wait_user_events;
+      while(user_events != NULL) {
+        /* Insert the enqueue_callback to user event's  waits_tail */
+        node = user_events->event->waits_head;
+        while((node != cb) && node->next)
+          node = node->next;
+        if(node == cb) {  //wait on dup user event
+          user_events = user_events->next;
+          continue;
+        }
+        node->next = cb;
+
+        /* Insert the user event to enqueue_callback's wait_user_events */
+        TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event));
+        u_ev->event = user_events->event;
+        u_ev->next = cb->wait_user_events;
+        cb->wait_user_events = u_ev;
+        user_events = user_events->next;
+      }
+    }
+  }
+  if(data->queue != NULL) {
+    assert(event->gpgpu_event);
+    cl_gpgpu_event_pending(data->queue->gpgpu, event->gpgpu_event);
+    data->ptr = (void *)event->gpgpu_event;
+  }
+  cb->data = *data;
+  event->enqueue_cb = cb;
+
+exit:
+  return;
+error:
+  if(cb) {
+    while(cb->wait_user_events) {
+      u_ev = cb->wait_user_events;
+      cb->wait_user_events = cb->wait_user_events->next;
+      cl_free(u_ev);
+    }
+    cl_free(cb);
+  }
+  goto exit;
+}
+
+void cl_event_set_status(cl_event event, cl_int status)
+{
+  user_callback *user_cb;
+  user_event    *u_ev, *u_ev_next;
+  cl_int ret, i;
+  cl_event evt;
+
+  pthread_mutex_lock(&event->ctx->event_lock);
+  if(status >= event->status) {
+   return;
+  }
+
+  if(status <= CL_COMPLETE) {
+    if(event->enqueue_cb) {
+      for(i=0; i<event->enqueue_cb->num_events; i++)
+        cl_event_delete(event->enqueue_cb->wait_list[i]);
+
+      cl_enqueue_handle(&event->enqueue_cb->data);
+      cl_free(event->enqueue_cb);
+      event->enqueue_cb = NULL;
+    }
+    cl_event_delete(event);
+  }
+  event->status = status;
+  pthread_mutex_unlock(&event->ctx->event_lock);
+
+  /* Call user callback */
+  user_cb = event->user_cb;
+  while(user_cb) {
+    if(user_cb->status >= status) {
+      user_cb->pfn_notify(event, event->status, user_cb->user_data);
+      user_cb->executed = CL_TRUE;
+    }
+    user_cb = user_cb->next;
+  }
+
+  if(event->type != CL_COMMAND_USER)
+    return;
+
+  /* Check all defer enqueue */
+  enqueue_callback *cb, *enqueue_cb = event->waits_head;
+  while(enqueue_cb) {
+    /* Remove this user event in enqueue_cb */
+    while(enqueue_cb->wait_user_events &&
+          enqueue_cb->wait_user_events->event == event) {
+      u_ev = enqueue_cb->wait_user_events;
+      enqueue_cb->wait_user_events = enqueue_cb->wait_user_events->next;
+      cl_free(u_ev);
+    }
+
+    u_ev = enqueue_cb->wait_user_events;
+    while(u_ev) {
+      u_ev_next = u_ev->next;
+      if(u_ev_next && u_ev_next->event == event) {
+        u_ev->next = u_ev_next->next;
+        cl_free(u_ev_next);
+      } else
+        u_ev->next = u_ev_next;
+    }
+
+    /* Still wait on other user events */
+    if(enqueue_cb->wait_user_events != NULL) {
+      enqueue_cb = enqueue_cb->next;
+      continue;
+    }
+
+    /* All user events complete, now wait enqueue events */
+    ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list);
+    assert(ret != CL_ENQUEUE_EXECUTE_DEFER);
+
+    cb = enqueue_cb;
+    enqueue_cb = enqueue_cb->next;
+
+    /* Call the pending operation */
+    evt = cb->event;
+    cl_event_set_status(cb->event, CL_COMPLETE);
+    if(cb->event->emplict == CL_FALSE) {
+      cl_event_delete(evt);
+    }
+  }
+  event->waits_head = NULL;
+}
+
+void cl_event_update_status(cl_event event)
+{
+  if(event->status <= CL_COMPLETE)
+    return;
+  if((event->gpgpu_event) &&
+     (cl_gpgpu_event_update_status(event->gpgpu_event, 0)))
+    cl_event_set_status(event, CL_COMPLETE);
+}
diff --git a/src/cl_event.h b/src/cl_event.h
index 23378e8..c921cb2 100644
--- a/src/cl_event.h
+++ b/src/cl_event.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -20,9 +20,73 @@
 #ifndef __CL_EVENT_H__
 #define __CL_EVENT_H__
 
+#include <semaphore.h>
+
+#include "cl_enqueue.h"
+#include "cl_internals.h"
+#include "cl_driver.h"
+#include "CL/cl.h"
+
+#define CL_ENQUEUE_EXECUTE_IMM   0
+#define CL_ENQUEUE_EXECUTE_DEFER 1
+
+typedef struct _user_event {
+  cl_event            event;   /* The user event */
+  struct _user_event* next;    /* Next user event in list */
+} user_event;
+
+typedef struct _enqueue_callback {
+  cl_event           event;            /* The event relative this enqueue callback */
+  enqueue_data       data;             /* Hold all enqueue callback's infomation */
+  cl_uint            num_events;       /* num events in wait list */
+  const cl_event*    wait_list;        /* All event wait list this callback wait on */
+  user_event*        wait_user_events; /* The head of user event list the callback wait on */
+  struct _enqueue_callback*  next;     /* The  next enqueue callback in wait list */
+} enqueue_callback;
+
+typedef void (CL_CALLBACK *EVENT_NOTIFY)(cl_event event, cl_int event_command_exec_status, void *user_data);
+
+typedef struct _user_callback {
+  cl_int            status;     /* The execution status */
+  cl_bool           executed;   /* Indicat the callback function been called or not */
+  EVENT_NOTIFY      pfn_notify; /* Callback function */
+  void*             user_data;  /* Callback user data */
+  struct _user_callback*    next;       /* Next event callback in list */
+} user_callback;
+
 struct _cl_event {
   DEFINE_ICD(dispatch)
+  uint64_t           magic;       /* To identify it as a sampler object */
+  volatile int       ref_n;       /* We reference count this object */
+  cl_context         ctx;         /* The context associated with event */
+  cl_event           prev, next;  /* We chain the memory buffers together */
+  cl_command_queue   queue;       /* The command queue associated with event */
+  cl_command_type    type;        /* The command type associated with event */
+  cl_int             status;      /* The execution status */
+  cl_gpgpu_event     gpgpu_event; /* The event object communicate with hardware */
+  user_callback*     user_cb;     /* The event callback functions */
+  enqueue_callback*  enqueue_cb;  /* This event's enqueue */
+  enqueue_callback*  waits_head;  /* The head of enqueues list wait on this event */
+  cl_bool            emplict;     /* Identify this event whether created by api emplict*/
 };
 
+/* Create a new event object */
+cl_event cl_event_new(cl_context, cl_command_queue, cl_command_type, cl_bool);
+/* Unref the object and delete it if no more reference on it */
+void cl_event_delete(cl_event);
+/* Add one more reference to this object */
+void cl_event_add_ref(cl_event);
+/* Rigister a user callback function for specific commond execution status */
+cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *);
+/* Check events wait list for enqueue commonds */
+cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context);
+/* Wait the all events in wait list complete */
+cl_int cl_event_wait_events(cl_uint, const cl_event *);
+/* New a enqueue suspend task */
+void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *);
+/* Set the event status and call all callbacks */
+void cl_event_set_status(cl_event, cl_int);
+/* Check and update event status */
+void cl_event_update_status(cl_event);
 #endif /* __CL_EVENT_H__ */
 
diff --git a/src/cl_internals.h b/src/cl_internals.h
index b2b25b2..693de1d 100644
--- a/src/cl_internals.h
+++ b/src/cl_internals.h
@@ -28,6 +28,7 @@
 #define CL_MAGIC_PROGRAM_HEADER   0x34560ab12789cdefLL
 #define CL_MAGIC_QUEUE_HEADER     0x83650a12b79ce4dfLL
 #define CL_MAGIC_SAMPLER_HEADER   0x686a0ecba79ce33fLL
+#define CL_MAGIC_EVENT_HEADER     0x8324a9c810ebf90fLL
 #define CL_MAGIC_MEM_HEADER       0x381a27b9ce6504dfLL
 #define CL_MAGIC_DEAD_HEADER      0xdeaddeaddeaddeadLL
 
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 59b7a2b..bfe418d 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -147,6 +147,18 @@ do {                                                        \
   }                                                         \
 } while (0)
 
+#define CHECK_EVENT(EVENT)                                    \
+  do {                                                        \
+    if (UNLIKELY(EVENT == NULL)) {                            \
+      err = CL_INVALID_EVENT;                            \
+      goto error;                                             \
+    }                                                         \
+    if (UNLIKELY(EVENT->magic != CL_MAGIC_EVENT_HEADER)) {    \
+      err = CL_INVALID_EVENT;                                 \
+      goto error;                                             \
+    }                                                         \
+  } while (0)
+
 #define CHECK_SAMPLER(SAMPLER)                              \
 do {                                                        \
   if (UNLIKELY(SAMPLER == NULL)) {                          \
-- 
1.7.10.4



More information about the Beignet mailing list