[Beignet] [PATCH V2 3/6] Add some functions to support event in intel gpgpu.

Mon Aug 12 01:07:18 PDT 2013

Now runtime prepare command batch first, if can't flush this command
immediately, call cl_gpgpu_event_pending to append the command to event,
when the command batch's wait events completed, than call cl_gpgpu_event_resume
to flush.

Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_driver.h         |   31 ++++++++++++++
 src/cl_driver_defs.c    |    5 +++
 src/cl_enqueue.c        |    2 +-
 src/intel/intel_gpgpu.c |  106 +++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 134 insertions(+), 10 deletions(-)

diff --git a/src/cl_driver.h b/src/cl_driver.h
index 673985d..1a0ec38 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -46,6 +46,9 @@ typedef struct _cl_driver *cl_driver;
 /* Encapsulates the gpgpu stream of commands */
 typedef struct _cl_gpgpu *cl_gpgpu;
 
+/* Encapsulates the event  of a command stream */
+typedef struct _cl_gpgpu_event *cl_gpgpu_event;
+
 typedef struct _cl_context_prop *cl_context_prop;
 typedef struct _cl_sampler *cl_sampler;
 
@@ -86,6 +89,13 @@ typedef enum cl_cache_control {
   cc_llc_l3   = 0x3
 } cl_cache_control;
 
+typedef enum gpu_command_status {
+  command_queued    = 3,
+  command_submitted = 2,
+  command_running   = 1,
+  command_complete  = 0
+} gpu_command_status;
+
 /* Use this structure to bind kernels in the gpgpu state */
 typedef struct cl_gpgpu_kernel {
   const char *name;        /* kernel name and bo name */
@@ -179,6 +189,27 @@ extern cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end;
 typedef void (cl_gpgpu_flush_cb)(cl_gpgpu);
 extern cl_gpgpu_flush_cb *cl_gpgpu_flush;
 
+/* new a event for a batch buffer */
+typedef cl_gpgpu_event (cl_gpgpu_event_new_cb)(cl_gpgpu);
+extern cl_gpgpu_event_new_cb *cl_gpgpu_event_new;
+
+/* new a event for a batch buffer */
+typedef int (cl_gpgpu_event_update_status_cb)(cl_gpgpu_event, int);
+extern cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status;
+
+/* new a event for a batch buffer */
+typedef void (cl_gpgpu_event_pending_cb)(cl_gpgpu, cl_gpgpu_event);
+extern cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending;
+
+/* new a event for a batch buffer */
+typedef void (cl_gpgpu_event_resume_cb)(cl_gpgpu_event);
+extern cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume;
+
+/* new a event for a batch buffer */
+typedef void (cl_gpgpu_event_delete_cb)(cl_gpgpu_event);
+extern cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete;
+
+
 /* Will spawn all threads */
 typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
                                   uint32_t simd_sz,
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 9aa926e..e7412de 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -63,4 +63,9 @@ LOCAL cl_gpgpu_batch_end_cb *cl_gpgpu_batch_end = NULL;
 LOCAL cl_gpgpu_flush_cb *cl_gpgpu_flush = NULL;
 LOCAL cl_gpgpu_walker_cb *cl_gpgpu_walker = NULL;
 LOCAL cl_gpgpu_bind_sampler_cb *cl_gpgpu_bind_sampler = NULL;
+LOCAL cl_gpgpu_event_new_cb *cl_gpgpu_event_new = NULL;
+LOCAL cl_gpgpu_event_update_status_cb *cl_gpgpu_event_update_status = NULL;
+LOCAL cl_gpgpu_event_pending_cb *cl_gpgpu_event_pending = NULL;
+LOCAL cl_gpgpu_event_resume_cb *cl_gpgpu_event_resume = NULL;
+LOCAL cl_gpgpu_event_delete_cb *cl_gpgpu_event_delete = NULL;
 
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index cfc7355..a112cc4 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -329,7 +329,7 @@ cl_int cl_enqueue_handle(enqueue_data* data)
     case EnqueueUnmapMemObject:
       return cl_enqueue_unmap_mem_object(data);
     case EnqueueNDRangeKernel:
-      //cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);   //goto default
+      cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr);   //goto default
     default:
       return CL_SUCCESS;
   }
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index e553a55..c45d471 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -57,6 +57,12 @@ typedef struct surface_heap {
   char surface[256][sizeof(gen6_surface_state_t)];
 } surface_heap_t;
 
+typedef struct intel_event {
+  intel_batchbuffer_t *batch;
+  drm_intel_bo* buffer;
+  int status;
+} intel_event_t;
+
 #define MAX_IF_DESC    32
 
 /* We can bind only a limited number of buffers */
@@ -106,8 +112,8 @@ typedef struct intel_gpgpu intel_gpgpu_t;
 static void
 intel_gpgpu_sync(intel_gpgpu_t *gpgpu)
 {
-    if (gpgpu->batch->last_bo)
-	drm_intel_bo_wait_rendering(gpgpu->batch->last_bo);
+  if (gpgpu->batch->last_bo)
+    drm_intel_bo_wait_rendering(gpgpu->batch->last_bo);
 }
 
 static void
@@ -225,7 +231,7 @@ intel_gpgpu_load_vfe_state(intel_gpgpu_t *gpgpu)
 }
 
 static void
-intel_gpgpu_load_constant_buffer(intel_gpgpu_t *gpgpu) 
+intel_gpgpu_load_constant_buffer(intel_gpgpu_t *gpgpu)
 {
   BEGIN_BATCH(gpgpu->batch, 4);
   OUT_BATCH(gpgpu->batch, CMD(2,0,1) | (4 - 2));  /* length-2 */
@@ -243,7 +249,7 @@ intel_gpgpu_load_constant_buffer(intel_gpgpu_t *gpgpu)
 }
 
 static void
-intel_gpgpu_load_idrt(intel_gpgpu_t *gpgpu) 
+intel_gpgpu_load_idrt(intel_gpgpu_t *gpgpu)
 {
   BEGIN_BATCH(gpgpu->batch, 4);
   OUT_BATCH(gpgpu->batch, CMD(2,0,2) | (4 - 2)); /* length-2 */
@@ -256,7 +262,7 @@ intel_gpgpu_load_idrt(intel_gpgpu_t *gpgpu)
 static const uint32_t gpgpu_l3_config_reg1[] = {
   0x00080040, 0x02040040, 0x00800040, 0x01000038,
   0x02000030, 0x01000038, 0x00000038, 0x00000040,
-  0x0A140091, 0x09100091, 0x08900091, 0x08900091 
+  0x0A140091, 0x09100091, 0x08900091, 0x08900091
 };
 
 static const uint32_t gpgpu_l3_config_reg2[] = {
@@ -404,7 +410,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   /* surface state */
   if(gpgpu->surface_heap_b.bo)
     dri_bo_unreference(gpgpu->surface_heap_b.bo);
-  bo = dri_bo_alloc(bufmgr, 
+  bo = dri_bo_alloc(bufmgr,
                     "SURFACE_HEAP",
                     sizeof(surface_heap_t),
                     32);
@@ -416,7 +422,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   /* Interface descriptor remap table */
   if(gpgpu->idrt_b.bo)
     dri_bo_unreference(gpgpu->idrt_b.bo);
-  bo = dri_bo_alloc(bufmgr, 
+  bo = dri_bo_alloc(bufmgr,
                     "IDRT",
                     MAX_IF_DESC * sizeof(struct gen6_interface_descriptor),
                     32);
@@ -431,7 +437,7 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   /* sampler state */
   if (gpgpu->sampler_state_b.bo)
     dri_bo_unreference(gpgpu->sampler_state_b.bo);
-  bo = dri_bo_alloc(gpgpu->drv->bufmgr, 
+  bo = dri_bo_alloc(gpgpu->drv->bufmgr,
                     "SAMPLER_STATE",
                     GEN_MAX_SAMPLERS * sizeof(gen6_sampler_state_t),
                     32);
@@ -830,6 +836,83 @@ intel_gpgpu_walker(intel_gpgpu_t *gpgpu,
   ADVANCE_BATCH(gpgpu->batch);
 }
 
+static intel_event_t*
+intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
+{
+  intel_event_t *event = NULL;
+  TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t));
+
+  event->status = command_queued;
+  event->batch = NULL;
+  event->buffer = gpgpu->batch->buffer;
+  if(event->buffer != NULL)
+    drm_intel_bo_reference(event->buffer);
+
+exit:
+  return event;
+error:
+  cl_free(event);
+  event = NULL;
+  goto exit;
+}
+
+static int
+intel_gpgpu_event_update_status(intel_event_t *event, int wait)
+{
+  if(event->status == command_complete)
+    return event->status;
+
+  if (event->buffer &&
+      event->batch == NULL &&        //have flushed
+      !drm_intel_bo_busy(event->buffer)) {
+    event->status = command_complete;
+    drm_intel_bo_unreference(event->buffer);
+    event->buffer = NULL;
+    return event->status;
+  }
+
+  if(wait == 0)
+    return event->status;
+
+  if (event->buffer) {
+    drm_intel_bo_wait_rendering(event->buffer);
+    event->status = command_complete;
+    drm_intel_bo_unreference(event->buffer);
+    event->buffer = NULL;
+  }
+  return event->status;
+}
+
+static void
+intel_gpgpu_event_pending(intel_gpgpu_t *gpgpu, intel_event_t *event)
+{
+  assert(event->buffer);           //This is gpu enqueue command
+  assert(event->batch == NULL);    //This command haven't pengding.
+  event->batch = intel_batchbuffer_new(gpgpu->drv);
+  assert(event->batch);
+  *event->batch = *gpgpu->batch;
+  if(event->batch->buffer)
+    drm_intel_bo_reference(event->batch->buffer);
+}
+
+static void
+intel_gpgpu_event_resume(intel_event_t *event)
+{
+  assert(event->batch);           //This command have pending.
+  intel_batchbuffer_flush(event->batch);
+  intel_batchbuffer_delete(event->batch);
+  event->batch = NULL;
+}
+
+static void
+intel_gpgpu_event_delete(intel_event_t *event)
+{
+  assert(event->batch == NULL);   //This command must have been flushed.
+  if(event->buffer)
+    drm_intel_bo_unreference(event->buffer);
+  cl_free(event);
+}
+
 LOCAL void
 intel_set_gpgpu_callbacks(void)
 {
@@ -851,5 +934,10 @@ intel_set_gpgpu_callbacks(void)
   cl_gpgpu_walker = (cl_gpgpu_walker_cb *) intel_gpgpu_walker;
   cl_gpgpu_bind_sampler = (cl_gpgpu_bind_sampler_cb *) intel_gpgpu_bind_sampler;
   cl_gpgpu_set_scratch = (cl_gpgpu_set_scratch_cb *) intel_gpgpu_set_scratch;
+  cl_gpgpu_event_new = (cl_gpgpu_event_new_cb *)intel_gpgpu_event_new;
+  cl_gpgpu_event_update_status = (cl_gpgpu_event_update_status_cb *)intel_gpgpu_event_update_status;
+  cl_gpgpu_event_pending = (cl_gpgpu_event_pending_cb *)intel_gpgpu_event_pending;
+  cl_gpgpu_event_resume = (cl_gpgpu_event_resume_cb *)intel_gpgpu_event_resume;
+  cl_gpgpu_event_delete = (cl_gpgpu_event_delete_cb *)intel_gpgpu_event_delete;
 }
 
-- 
1.7.10.4