[Beignet] [PATCH 5/6 OpenCL-1.2] Add the printf logic into the run time.

junyan.he at inbox.com junyan.he at inbox.com
Tue Jun 3 23:03:43 PDT 2014


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 backend/src/gbe_bin_interpreter.cpp |   6 ++
 src/cl_command_queue.c              |  14 +++++
 src/cl_command_queue_gen7.c         |  24 ++++++++
 src/cl_driver.h                     |  28 ++++++++++
 src/cl_driver_defs.c                |   7 +++
 src/intel/intel_gpgpu.c             | 106 ++++++++++++++++++++++++++++++++++++
 6 files changed, 185 insertions(+)

diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp
index bd160c3..19c98ab 100644
--- a/backend/src/gbe_bin_interpreter.cpp
+++ b/backend/src/gbe_bin_interpreter.cpp
@@ -21,6 +21,7 @@
 #include "sys/assert.cpp"
 #include "sys/platform.cpp"
 #include "ir/constant.cpp"
+#include "ir/printf.cpp"
 
 #pragma GCC diagnostic ignored "-Wunused-function"
 #pragma GCC diagnostic ignored "-Wunused-variable"
@@ -62,6 +63,11 @@ struct BinInterpCallBackInitializer
     gbe_kernel_get_image_data = gbe::kernelGetImageData;
     gbe_get_image_base_index = gbe::getImageBaseIndex;
     gbe_set_image_base_index_interp = gbe::setImageBaseIndex;
+    gbe_get_printf_num = gbe::kernelGetPrintfNum;
+    gbe_dup_printfset = gbe::kernelDupPrintfSet;
+    gbe_get_printf_sizeof_size = gbe::kernelGetPrintfSizeOfSize;
+    gbe_release_printf_info = gbe::kernelReleasePrintfSet;
+    gbe_output_printf = gbe::kernelOutputPrintf;
   }
 
   ~BinInterpCallBackInitializer() {
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index c754ad5..aa9e489 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -420,9 +420,23 @@ LOCAL cl_int
 cl_command_queue_flush(cl_command_queue queue)
 {
   GET_QUEUE_THREAD_GPGPU(queue);
+  size_t global_wk_sz[3];
+  void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz);
 
   cl_gpgpu_flush(gpgpu);
 
+  if (printf_info && gbe_get_printf_num(printf_info)) {
+    void *index_addr = cl_gpgpu_map_printf_buffer(gpgpu, 0);
+    void *buf_addr = cl_gpgpu_map_printf_buffer(gpgpu, 1);
+    gbe_output_printf(printf_info, index_addr, buf_addr, global_wk_sz[0],
+                      global_wk_sz[1], global_wk_sz[2]);
+    cl_gpgpu_unmap_printf_buffer(gpgpu, 0);
+    cl_gpgpu_unmap_printf_buffer(gpgpu, 1);
+    gbe_release_printf_info(printf_info);
+    global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
+    cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
+  }
+
   cl_invalid_thread_gpgpu(queue);
   return CL_SUCCESS;
 }
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 891d6f1..ebe056f 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -246,6 +246,19 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
   cl_gpgpu_set_stack(gpgpu, offset, stack_sz, cc_llc_l3);
 }
 
+static void
+cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
+  int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER;
+  int32_t offset = gbe_kernel_get_curbe_offset(ker->opaque, value, 0);
+  size_t buf_size = global_sz * sizeof(int) * printf_num;
+  cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size, offset);
+
+  value = GBE_CURBE_PRINTF_BUF_POINTER;
+  offset = gbe_kernel_get_curbe_offset(ker->opaque, value, 0);
+  buf_size = gbe_get_printf_sizeof_size(printf_info) * global_sz;
+  cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset);
+}
+
 LOCAL cl_int
 cl_command_queue_ND_range_gen7(cl_command_queue queue,
                                cl_kernel ker,
@@ -263,7 +276,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
   int32_t scratch_sz = gbe_kernel_get_scratch_size(ker->opaque);
   size_t thread_n = 0u;
+  int printf_num = 0;
   cl_int err = CL_SUCCESS;
+  size_t global_size = global_wk_sz[0] * global_wk_sz[1] * global_wk_sz[2];
+  void* printf_info = NULL;
 
   /* Setup kernel */
   kernel.name = "KERNEL";
@@ -291,12 +307,20 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
     }
   }
 
+  printf_info = gbe_dup_printfset(ker->opaque);
+  cl_gpgpu_set_printf_info(gpgpu, printf_info, (size_t *)global_wk_sz);
+
   /* Setup the kernel */
   if (queue->props & CL_QUEUE_PROFILING_ENABLE)
     cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 1);
   else
     cl_gpgpu_state_init(gpgpu, ctx->device->max_compute_unit, cst_sz / 32, 0);
 
+  printf_num = gbe_get_printf_num(printf_info);
+  if (printf_num) {
+    cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size);
+  }
+
   /* Bind user buffers */
   cl_command_queue_bind_surface(queue, ker);
   /* Bind user images */
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 9dc2330..75a8a09 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -208,6 +208,34 @@ extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
 typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
 extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
 
+/* Set the printf buffer */
+typedef void (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t);
+extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer;
+
+/* get the printf buffer offset in the apeture*/
+typedef unsigned long (cl_gpgpu_reloc_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t);
+extern cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer;
+
+/* map the printf buffer */
+typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu, uint32_t);
+extern cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer;
+
+/* unmap the printf buffer */
+typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu, uint32_t);
+extern cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer;
+
+/* release the printf buffer */
+typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu, uint32_t);
+extern cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer;
+
+/* Set the last printfset pointer */
+typedef void (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *, size_t*);
+extern cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info;
+
+/* Get the last printfset pointer */
+typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu, size_t*);
+extern cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info;
+
 /* Will spawn all threads */
 typedef void (cl_gpgpu_walker_cb)(cl_gpgpu,
                                   uint32_t simd_sz,
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 95a1a03..0781dbf 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -85,4 +85,11 @@ LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp =
 LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
 LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL;
 LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL;
+LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL;
+LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL;
+LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL;
+LOCAL cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer = NULL;
+LOCAL cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info = NULL;
+LOCAL cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info = NULL;
+LOCAL cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer = NULL;
 
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 2c2541f..36187e7 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -71,6 +71,9 @@ enum {max_sampler_n = 16 };
 /* Handle GPGPU state */
 struct intel_gpgpu
 {
+  void* ker_opaque;
+  size_t global_wk_sz[3];
+  void* printf_info;
   intel_driver_t *drv;
   intel_batchbuffer_t *batch;
   cl_gpgpu_kernel *ker;
@@ -90,6 +93,8 @@ struct intel_gpgpu
   struct { drm_intel_bo *bo; } scratch_b;
   struct { drm_intel_bo *bo; } constant_b;
   struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
+  struct { drm_intel_bo *bo;
+           drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
 
   struct { drm_intel_bo *bo; } aux_buf;
   struct {
@@ -145,6 +150,10 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
     return;
   if(gpgpu->time_stamp_b.bo)
     drm_intel_bo_unreference(gpgpu->time_stamp_b.bo);
+  if(gpgpu->printf_b.bo)
+    drm_intel_bo_unreference(gpgpu->printf_b.bo);
+  if(gpgpu->printf_b.ibo)
+    drm_intel_bo_unreference(gpgpu->printf_b.ibo);
   if (gpgpu->aux_buf.bo)
     drm_intel_bo_unreference(gpgpu->aux_buf.bo);
   if (gpgpu->perf_b.bo)
@@ -525,6 +534,13 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   gpgpu->urb.size_cs_entry = size_cs_entry;
   gpgpu->max_threads = max_threads;
 
+  if (gpgpu->printf_b.ibo)
+    dri_bo_unreference(gpgpu->printf_b.ibo);
+  gpgpu->printf_b.ibo = NULL;
+  if (gpgpu->printf_b.bo)
+    dri_bo_unreference(gpgpu->printf_b.bo);
+  gpgpu->printf_b.bo = NULL;
+
   /* Set the profile buffer*/
   if(gpgpu->time_stamp_b.bo)
     dri_bo_unreference(gpgpu->time_stamp_b.bo);
@@ -1160,6 +1176,90 @@ intel_gpgpu_event_get_exec_timestamp(intel_event_t *event,
   drm_intel_gem_bo_unmap_gtt(event->ts_buf);
 }
 
+static void
+intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset)
+{
+  drm_intel_bo *bo = NULL;
+  if (i == 0) { // the index buffer.
+    if (gpgpu->printf_b.ibo)
+      dri_bo_unreference(gpgpu->printf_b.ibo);
+    gpgpu->printf_b.ibo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf index buffer", size, 4096);
+    bo = gpgpu->printf_b.ibo;
+  } else if (i == 1) {
+    if (gpgpu->printf_b.bo)
+      dri_bo_unreference(gpgpu->printf_b.bo);
+    gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf output buffer", size, 4096);
+    bo = gpgpu->printf_b.bo;
+  } else
+    assert(0);
+
+  drm_intel_bo_map(bo, 1);
+  memset(bo->virtual, 0, size);
+  drm_intel_bo_unmap(bo);
+
+  intel_gpgpu_bind_buf(gpgpu, bo, offset, 0, 0);
+}
+
+static void*
+intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
+{
+  drm_intel_bo *bo = NULL;
+  if (i == 0) {
+    bo = gpgpu->printf_b.ibo;
+  } else if (i == 1) {
+    bo = gpgpu->printf_b.bo;
+  } else
+    assert(0);
+
+  drm_intel_bo_map(bo, 1);
+  return bo->virtual;
+}
+
+static void
+intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu, uint32_t i)
+{
+  drm_intel_bo *bo = NULL;
+  if (i == 0) {
+    bo = gpgpu->printf_b.ibo;
+  } else if (i == 1) {
+    bo = gpgpu->printf_b.bo;
+  } else
+  assert(0);
+
+  drm_intel_bo_unmap(bo);
+}
+
+static void
+intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
+{
+  if (i == 0) {
+    drm_intel_bo_unreference(gpgpu->printf_b.ibo);
+    gpgpu->printf_b.ibo = NULL;
+  } else if (i == 1) {
+    drm_intel_bo_unreference(gpgpu->printf_b.bo);
+    gpgpu->printf_b.bo = NULL;
+  } else
+    assert(0);
+}
+
+static void
+intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info, size_t * global_sz)
+{
+  gpgpu->printf_info = printf_info;
+  gpgpu->global_wk_sz[0] = global_sz[0];
+  gpgpu->global_wk_sz[1] = global_sz[1];
+  gpgpu->global_wk_sz[2] = global_sz[2];
+}
+
+static void*
+intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu, size_t * global_sz)
+{
+  global_sz[0] = gpgpu->global_wk_sz[0];
+  global_sz[1] = gpgpu->global_wk_sz[1];
+  global_sz[2] = gpgpu->global_wk_sz[2];
+  return gpgpu->printf_info;
+}
+
 LOCAL void
 intel_set_gpgpu_callbacks(int device_id)
 {
@@ -1190,6 +1290,12 @@ intel_set_gpgpu_callbacks(int device_id)
   cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
   cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
   cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+  cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf;
+  cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf;
+  cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr;
+  cl_gpgpu_release_printf_buffer = (cl_gpgpu_release_printf_buffer_cb *)intel_gpgpu_release_printf_buf;
+  cl_gpgpu_set_printf_info = (cl_gpgpu_set_printf_info_cb *)intel_gpgpu_set_printf_info;
+  cl_gpgpu_get_printf_info = (cl_gpgpu_get_printf_info_cb *)intel_gpgpu_get_printf_info;
 
   if (IS_HASWELL(device_id)) {
     cl_gpgpu_bind_image = (cl_gpgpu_bind_image_cb *) intel_gpgpu_bind_image_gen75;
-- 
1.8.3.2



More information about the Beignet mailing list