[Beignet] [PATCH] Runtime: Fix broken OCL_OUTPUT_KERNEL_PERF
Xiuli Pan
xiuli.pan at intel.com
Wed Apr 5 06:54:10 UTC 2017
From: Pan Xiuli <xiuli.pan at intel.com>
After the runtime refining, the OCL_OUTPUT_KERNEL_PERF is broken. Fix
it for performance tuning.
Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
---
src/cl_api_context.c | 2 ++
src/cl_command_queue_gen7.c | 1 +
src/cl_enqueue.c | 3 +++
src/cl_enqueue.h | 1 +
src/performance.c | 1 -
5 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/cl_api_context.c b/src/cl_api_context.c
index e8184b1..7028f8d 100644
--- a/src/cl_api_context.c
+++ b/src/cl_api_context.c
@@ -19,6 +19,7 @@
#include "cl_context.h"
#include "cl_device_id.h"
#include "cl_alloc.h"
+#include "performance.h"
cl_context
clCreateContext(const cl_context_properties *properties,
@@ -55,6 +56,7 @@ clCreateContext(const cl_context_properties *properties,
context = cl_create_context(properties, num_devices, devices, pfn_notify, user_data, &err);
} while (0);
+ initialize_env_var();
if (errcode_ret)
*errcode_ret = err;
return context;
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index dd82a44..6f85148 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -492,6 +492,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
event->exec_data.queue = queue;
event->exec_data.gpgpu = gpgpu;
event->exec_data.type = EnqueueNDRangeKernel;
+ event->exec_data.name = kernel.name;
return CL_SUCCESS;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 8350089..166dc55 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -25,6 +25,7 @@
#include "cl_utils.h"
#include "cl_alloc.h"
#include "cl_device_enqueue.h"
+#include "performance.h"
#include <stdio.h>
#include <string.h>
#include <assert.h>
@@ -579,6 +580,8 @@ cl_enqueue_ndrange(enqueue_data *data, cl_int status)
void *batch_buf = cl_gpgpu_ref_batch_buf(data->gpgpu);
cl_gpgpu_sync(batch_buf);
cl_gpgpu_unref_batch_buf(batch_buf);
+ if(b_output_kernel_perf)
+ time_end(data->queue->ctx, data->name, "", data->queue);
}
return err;
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 50a54fc..1532a59 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -81,6 +81,7 @@ typedef struct _enqueue_data {
cl_bool mid_event_of_enq; /* For non-uniform ndrange, one enqueue have a sequence event, the
last event need to parse device enqueue information.
0 : last event; 1: non-last event */
+ const char* name; /* enqueue name */
} enqueue_data;
/* Do real enqueue commands */
diff --git a/src/performance.c b/src/performance.c
index 1e676c3..b8cdcc6 100644
--- a/src/performance.c
+++ b/src/performance.c
@@ -325,7 +325,6 @@ void time_start(cl_context context, const char * kernel_name, cl_command_queue c
void time_end(cl_context context, const char * kernel_name, const char * build_opt, cl_command_queue cq)
{
- clFinish(cq);
gettimeofday(&end, NULL);
float t = (end.tv_sec - start.tv_sec)*1000 + (end.tv_usec - start.tv_usec)/1000.0f;
insert(context, kernel_name, build_opt, t);
--
2.7.4
More information about the Beignet
mailing list