[Beignet] [PATCH 14/21 V3] Runtime: Bind the profiling buffer when profiling enabled.

junyan.he at inbox.com junyan.he at inbox.com
Mon Nov 16 15:40:16 PST 2015


From: Junyan He <junyan.he at linux.intel.com>

Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
 src/cl_command_queue.c      |    8 ++++++
 src/cl_command_queue_gen7.c |   37 +++++++++++++++++++++++++++
 src/cl_driver.h             |   16 ++++++++++++
 src/cl_driver_defs.c        |    5 ++++
 src/intel/intel_gpgpu.c     |   58 +++++++++++++++++++++++++++++++++++++++++++
 src/intel/intel_gpgpu.h     |    3 ++-
 6 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 033e7df..884c8a8 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -229,6 +229,7 @@ cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
   size_t global_wk_sz[3];
   size_t outbuf_sz = 0;
   void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz, &outbuf_sz);
+  void* profiling_info;
 
   if (cl_gpgpu_flush(gpgpu) < 0)
     return CL_OUT_OF_RESOURCES;
@@ -252,6 +253,13 @@ cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
     global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
     cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
   }
+
+  /* If have profiling info, output it. */
+  profiling_info = cl_gpgpu_get_profiling_info(gpgpu);
+  if (profiling_info) {
+    interp_output_profiling(profiling_info, cl_gpgpu_map_profiling_buffer(gpgpu));
+    cl_gpgpu_unmap_profiling_buffer(gpgpu);
+  }
   return CL_SUCCESS;
 }
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 2a49ec2..e5198cd 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -270,6 +270,36 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
 }
 
 static int
+cl_bind_profiling(cl_gpgpu gpgpu, uint32_t simd_sz, cl_kernel ker, size_t global_sz, size_t local_sz, uint32_t bti) {
+  int32_t offset;
+  int i = 0;
+  int thread_num;
+  if (simd_sz == 16) {
+    for(i = 0; i < 3; i++) {
+      offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0);
+      assert(offset >= 0);
+      memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8*2);
+      thread_num = (local_sz + 15)/16;
+    }
+  } else {
+    assert(simd_sz == 8);
+    for(i = 0; i < 5; i++) {
+      offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0);
+      assert(offset >= 0);
+      memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8);
+      thread_num = (local_sz + 7)/8;
+    }
+  }
+
+  offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_BUF_POINTER, 0);
+  thread_num = thread_num*(global_sz/local_sz);
+  if (cl_gpgpu_set_profiling_buffer(gpgpu, thread_num*128 + 4, offset, bti))
+    return -1;
+
+  return 0;
+}
+
+static int
 cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
   int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER;
   int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0);
@@ -363,6 +393,13 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
     if (cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0)
       goto error;
   }
+  if (interp_get_profiling_bti(ker->opaque) != 0) {
+    if (cl_bind_profiling(gpgpu, simd_sz, ker, global_size, local_sz, interp_get_profiling_bti(ker->opaque)))
+      goto error;
+    cl_gpgpu_set_profiling_info(gpgpu, interp_dup_profiling(ker->opaque));
+  } else {
+	cl_gpgpu_set_profiling_info(gpgpu, NULL);
+  }
 
   /* Bind user buffers */
   cl_command_queue_bind_surface(queue, ker);
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 9d986b1..7081bea 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -273,6 +273,22 @@ extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
 typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
 extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
 
+/* Set the profiling buffer */
+typedef int (cl_gpgpu_set_profiling_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint8_t);
+extern cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer;
+
+typedef int (cl_gpgpu_set_profiling_info_cb)(cl_gpgpu, void *);
+extern cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info;
+
+typedef void* (cl_gpgpu_get_profiling_info_cb)(cl_gpgpu);
+extern cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info;
+
+typedef void* (cl_gpgpu_map_profiling_buffer_cb)(cl_gpgpu);
+extern cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer;
+
+typedef void (cl_gpgpu_unmap_profiling_buffer_cb)(cl_gpgpu);
+extern cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer;
+
 /* Set the printf buffer */
 typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t);
 extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 58c4f8f..31176a4 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -94,6 +94,11 @@ LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp =
 LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
 LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL;
 LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL;
+LOCAL cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer = NULL;
+LOCAL cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info = NULL;
+LOCAL cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info = NULL;
+LOCAL cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer = NULL;
+LOCAL cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer = NULL;
 LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL;
 LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL;
 LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 0c34ca9..7f212e2 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -151,6 +151,8 @@ intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
     drm_intel_bo_unreference(gpgpu->stack_b.bo);
   if (gpgpu->scratch_b.bo)
     drm_intel_bo_unreference(gpgpu->scratch_b.bo);
+  if (gpgpu->profiling_b.bo)
+    drm_intel_bo_unreference(gpgpu->profiling_b.bo);
 
   if(gpgpu->constant_b.bo)
     drm_intel_bo_unreference(gpgpu->constant_b.bo);
@@ -905,6 +907,10 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
     dri_bo_unreference(gpgpu->printf_b.bo);
   gpgpu->printf_b.bo = NULL;
 
+  if (gpgpu->profiling_b.bo)
+    dri_bo_unreference(gpgpu->profiling_b.bo);
+  gpgpu->profiling_b.bo = NULL;
+
   /* Set the profile buffer*/
   if(gpgpu->time_stamp_b.bo)
     dri_bo_unreference(gpgpu->time_stamp_b.bo);
@@ -2281,6 +2287,35 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event,
 }
 
 static int
+intel_gpgpu_set_profiling_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint32_t offset, uint8_t bti)
+{
+  drm_intel_bo *bo = NULL;
+
+  gpgpu->profiling_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "Profiling buffer", size, 64);
+  bo = gpgpu->profiling_b.bo;
+  if (!bo || (drm_intel_bo_map(bo, 1) != 0)) {
+    fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+    return -1;
+  }
+  memset(bo->virtual, 0, size);
+  drm_intel_bo_unmap(bo);
+  cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti);
+  return 0;
+}
+
+static void
+intel_gpgpu_set_profiling_info(intel_gpgpu_t *gpgpu, void* profiling_info)
+{
+  gpgpu->profiling_info = profiling_info;
+}
+
+static void*
+intel_gpgpu_get_profiling_info(intel_gpgpu_t *gpgpu)
+{
+  return gpgpu->profiling_info;
+}
+
+static int
 intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti)
 {
   drm_intel_bo *bo = NULL;
@@ -2311,6 +2346,24 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint
 }
 
 static void*
+intel_gpgpu_map_profiling_buf(intel_gpgpu_t *gpgpu)
+{
+  drm_intel_bo *bo = NULL;
+  bo = gpgpu->profiling_b.bo;
+  drm_intel_bo_map(bo, 1);
+  return bo->virtual;
+}
+
+static void
+intel_gpgpu_unmap_profiling_buf_addr(intel_gpgpu_t *gpgpu)
+{
+  drm_intel_bo *bo = NULL;
+  bo = gpgpu->profiling_b.bo;
+  drm_intel_bo_unmap(bo);
+}
+
+
+static void*
 intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
 {
   drm_intel_bo *bo = NULL;
@@ -2402,6 +2455,11 @@ intel_set_gpgpu_callbacks(int device_id)
   cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
   cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
   cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+  cl_gpgpu_set_profiling_buffer = (cl_gpgpu_set_profiling_buffer_cb *)intel_gpgpu_set_profiling_buf;
+  cl_gpgpu_set_profiling_info = (cl_gpgpu_set_profiling_info_cb *)intel_gpgpu_set_profiling_info;
+  cl_gpgpu_get_profiling_info = (cl_gpgpu_get_profiling_info_cb *)intel_gpgpu_get_profiling_info;
+  cl_gpgpu_map_profiling_buffer = (cl_gpgpu_map_profiling_buffer_cb *)intel_gpgpu_map_profiling_buf;
+  cl_gpgpu_unmap_profiling_buffer = (cl_gpgpu_unmap_profiling_buffer_cb *)intel_gpgpu_unmap_profiling_buf_addr;
   cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf;
   cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf;
   cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr;
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
index ad7290e..ccbf2fa 100644
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -46,6 +46,7 @@ struct intel_gpgpu
   void* ker_opaque;
   size_t global_wk_sz[3];
   void* printf_info;
+  void* profiling_info;
   struct intel_driver *drv;
   struct intel_batchbuffer *batch;
   cl_gpgpu_kernel *ker;
@@ -66,7 +67,7 @@ struct intel_gpgpu
   struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
   struct { drm_intel_bo *bo;
            drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
-
+  struct { drm_intel_bo *bo; } profiling_b;   /* the buf for profiling*/
   struct { drm_intel_bo *bo; } aux_buf;
   struct {
     uint32_t surface_heap_offset;
-- 
1.7.9.5





More information about the Beignet mailing list