[Beignet] [PATCH 14/21 V3] Runtime: Bind the profiling buffer when profiling enabled.
junyan.he at inbox.com
junyan.he at inbox.com
Mon Nov 16 15:40:16 PST 2015
From: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
src/cl_command_queue.c | 8 ++++++
src/cl_command_queue_gen7.c | 37 +++++++++++++++++++++++++++
src/cl_driver.h | 16 ++++++++++++
src/cl_driver_defs.c | 5 ++++
src/intel/intel_gpgpu.c | 58 +++++++++++++++++++++++++++++++++++++++++++
src/intel/intel_gpgpu.h | 3 ++-
6 files changed, 126 insertions(+), 1 deletion(-)
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 033e7df..884c8a8 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -229,6 +229,7 @@ cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
size_t global_wk_sz[3];
size_t outbuf_sz = 0;
void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz, &outbuf_sz);
+ void* profiling_info;
if (cl_gpgpu_flush(gpgpu) < 0)
return CL_OUT_OF_RESOURCES;
@@ -252,6 +253,13 @@ cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
}
+
+ /* If have profiling info, output it. */
+ profiling_info = cl_gpgpu_get_profiling_info(gpgpu);
+ if (profiling_info) {
+ interp_output_profiling(profiling_info, cl_gpgpu_map_profiling_buffer(gpgpu));
+ cl_gpgpu_unmap_profiling_buffer(gpgpu);
+ }
return CL_SUCCESS;
}
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 2a49ec2..e5198cd 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -270,6 +270,36 @@ cl_bind_stack(cl_gpgpu gpgpu, cl_kernel ker)
}
static int
+cl_bind_profiling(cl_gpgpu gpgpu, uint32_t simd_sz, cl_kernel ker, size_t global_sz, size_t local_sz, uint32_t bti) {
+ int32_t offset;
+ int i = 0;
+ int thread_num;
+ if (simd_sz == 16) {
+ for(i = 0; i < 3; i++) {
+ offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0);
+ assert(offset >= 0);
+ memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8*2);
+ thread_num = (local_sz + 15)/16;
+ }
+ } else {
+ assert(simd_sz == 8);
+ for(i = 0; i < 5; i++) {
+ offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_TIMESTAMP0 + i, 0);
+ assert(offset >= 0);
+ memset(ker->curbe + offset, 0x0, sizeof(uint32_t)*8);
+ thread_num = (local_sz + 7)/8;
+ }
+ }
+
+ offset = interp_kernel_get_curbe_offset(ker->opaque, GBE_CURBE_PROFILING_BUF_POINTER, 0);
+ thread_num = thread_num*(global_sz/local_sz);
+ if (cl_gpgpu_set_profiling_buffer(gpgpu, thread_num*128 + 4, offset, bti))
+ return -1;
+
+ return 0;
+}
+
+static int
cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER;
int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0);
@@ -363,6 +393,13 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
if (cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0)
goto error;
}
+ if (interp_get_profiling_bti(ker->opaque) != 0) {
+ if (cl_bind_profiling(gpgpu, simd_sz, ker, global_size, local_sz, interp_get_profiling_bti(ker->opaque)))
+ goto error;
+ cl_gpgpu_set_profiling_info(gpgpu, interp_dup_profiling(ker->opaque));
+ } else {
+ cl_gpgpu_set_profiling_info(gpgpu, NULL);
+ }
/* Bind user buffers */
cl_command_queue_bind_surface(queue, ker);
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 9d986b1..7081bea 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -273,6 +273,22 @@ extern cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf;
typedef void (cl_gpgpu_unref_batch_buf_cb)(void*);
extern cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf;
+/* Set the profiling buffer */
+typedef int (cl_gpgpu_set_profiling_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint8_t);
+extern cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer;
+
+typedef int (cl_gpgpu_set_profiling_info_cb)(cl_gpgpu, void *);
+extern cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info;
+
+typedef void* (cl_gpgpu_get_profiling_info_cb)(cl_gpgpu);
+extern cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info;
+
+typedef void* (cl_gpgpu_map_profiling_buffer_cb)(cl_gpgpu);
+extern cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer;
+
+typedef void (cl_gpgpu_unmap_profiling_buffer_cb)(cl_gpgpu);
+extern cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer;
+
/* Set the printf buffer */
typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t);
extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index 58c4f8f..31176a4 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -94,6 +94,11 @@ LOCAL cl_gpgpu_event_get_exec_timestamp_cb *cl_gpgpu_event_get_exec_timestamp =
LOCAL cl_gpgpu_event_get_gpu_cur_timestamp_cb *cl_gpgpu_event_get_gpu_cur_timestamp = NULL;
LOCAL cl_gpgpu_ref_batch_buf_cb *cl_gpgpu_ref_batch_buf = NULL;
LOCAL cl_gpgpu_unref_batch_buf_cb *cl_gpgpu_unref_batch_buf = NULL;
+LOCAL cl_gpgpu_set_profiling_buffer_cb *cl_gpgpu_set_profiling_buffer = NULL;
+LOCAL cl_gpgpu_set_profiling_info_cb *cl_gpgpu_set_profiling_info = NULL;
+LOCAL cl_gpgpu_get_profiling_info_cb *cl_gpgpu_get_profiling_info = NULL;
+LOCAL cl_gpgpu_map_profiling_buffer_cb *cl_gpgpu_map_profiling_buffer = NULL;
+LOCAL cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer = NULL;
LOCAL cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer = NULL;
LOCAL cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer = NULL;
LOCAL cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer = NULL;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 0c34ca9..7f212e2 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -151,6 +151,8 @@ intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
drm_intel_bo_unreference(gpgpu->stack_b.bo);
if (gpgpu->scratch_b.bo)
drm_intel_bo_unreference(gpgpu->scratch_b.bo);
+ if (gpgpu->profiling_b.bo)
+ drm_intel_bo_unreference(gpgpu->profiling_b.bo);
if(gpgpu->constant_b.bo)
drm_intel_bo_unreference(gpgpu->constant_b.bo);
@@ -905,6 +907,10 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
dri_bo_unreference(gpgpu->printf_b.bo);
gpgpu->printf_b.bo = NULL;
+ if (gpgpu->profiling_b.bo)
+ dri_bo_unreference(gpgpu->profiling_b.bo);
+ gpgpu->profiling_b.bo = NULL;
+
/* Set the profile buffer*/
if(gpgpu->time_stamp_b.bo)
dri_bo_unreference(gpgpu->time_stamp_b.bo);
@@ -2281,6 +2287,35 @@ intel_gpgpu_event_get_exec_timestamp(intel_gpgpu_t* gpgpu, intel_event_t *event,
}
static int
+intel_gpgpu_set_profiling_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint32_t offset, uint8_t bti)
+{
+ drm_intel_bo *bo = NULL;
+
+ gpgpu->profiling_b.bo = drm_intel_bo_alloc(gpgpu->drv->bufmgr, "Profiling buffer", size, 64);
+ bo = gpgpu->profiling_b.bo;
+ if (!bo || (drm_intel_bo_map(bo, 1) != 0)) {
+ fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
+ return -1;
+ }
+ memset(bo->virtual, 0, size);
+ drm_intel_bo_unmap(bo);
+ cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti);
+ return 0;
+}
+
+static void
+intel_gpgpu_set_profiling_info(intel_gpgpu_t *gpgpu, void* profiling_info)
+{
+ gpgpu->profiling_info = profiling_info;
+}
+
+static void*
+intel_gpgpu_get_profiling_info(intel_gpgpu_t *gpgpu)
+{
+ return gpgpu->profiling_info;
+}
+
+static int
intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti)
{
drm_intel_bo *bo = NULL;
@@ -2311,6 +2346,24 @@ intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint
}
static void*
+intel_gpgpu_map_profiling_buf(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo *bo = NULL;
+ bo = gpgpu->profiling_b.bo;
+ drm_intel_bo_map(bo, 1);
+ return bo->virtual;
+}
+
+static void
+intel_gpgpu_unmap_profiling_buf_addr(intel_gpgpu_t *gpgpu)
+{
+ drm_intel_bo *bo = NULL;
+ bo = gpgpu->profiling_b.bo;
+ drm_intel_bo_unmap(bo);
+}
+
+
+static void*
intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
{
drm_intel_bo *bo = NULL;
@@ -2402,6 +2455,11 @@ intel_set_gpgpu_callbacks(int device_id)
cl_gpgpu_event_get_gpu_cur_timestamp = (cl_gpgpu_event_get_gpu_cur_timestamp_cb *)intel_gpgpu_event_get_gpu_cur_timestamp;
cl_gpgpu_ref_batch_buf = (cl_gpgpu_ref_batch_buf_cb *)intel_gpgpu_ref_batch_buf;
cl_gpgpu_unref_batch_buf = (cl_gpgpu_unref_batch_buf_cb *)intel_gpgpu_unref_batch_buf;
+ cl_gpgpu_set_profiling_buffer = (cl_gpgpu_set_profiling_buffer_cb *)intel_gpgpu_set_profiling_buf;
+ cl_gpgpu_set_profiling_info = (cl_gpgpu_set_profiling_info_cb *)intel_gpgpu_set_profiling_info;
+ cl_gpgpu_get_profiling_info = (cl_gpgpu_get_profiling_info_cb *)intel_gpgpu_get_profiling_info;
+ cl_gpgpu_map_profiling_buffer = (cl_gpgpu_map_profiling_buffer_cb *)intel_gpgpu_map_profiling_buf;
+ cl_gpgpu_unmap_profiling_buffer = (cl_gpgpu_unmap_profiling_buffer_cb *)intel_gpgpu_unmap_profiling_buf_addr;
cl_gpgpu_set_printf_buffer = (cl_gpgpu_set_printf_buffer_cb *)intel_gpgpu_set_printf_buf;
cl_gpgpu_map_printf_buffer = (cl_gpgpu_map_printf_buffer_cb *)intel_gpgpu_map_printf_buf;
cl_gpgpu_unmap_printf_buffer = (cl_gpgpu_unmap_printf_buffer_cb *)intel_gpgpu_unmap_printf_buf_addr;
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
index ad7290e..ccbf2fa 100644
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -46,6 +46,7 @@ struct intel_gpgpu
void* ker_opaque;
size_t global_wk_sz[3];
void* printf_info;
+ void* profiling_info;
struct intel_driver *drv;
struct intel_batchbuffer *batch;
cl_gpgpu_kernel *ker;
@@ -66,7 +67,7 @@ struct intel_gpgpu
struct { drm_intel_bo *bo; } time_stamp_b; /* time stamp buffer */
struct { drm_intel_bo *bo;
drm_intel_bo *ibo;} printf_b; /* the printf buf and index buf*/
-
+ struct { drm_intel_bo *bo; } profiling_b; /* the buf for profiling*/
struct { drm_intel_bo *bo; } aux_buf;
struct {
uint32_t surface_heap_offset;
--
1.7.9.5
More information about the Beignet
mailing list