[Beignet] [PATCH V2 5/6] Add set constant buffer and recompile in run time.
Yang Rong
rong.r.yang at intel.com
Thu Apr 18 00:18:06 PDT 2013
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_command_queue.c | 21 ++++++++++++++++++++-
src/cl_command_queue.h | 2 ++
src/cl_command_queue_gen7.c | 11 ++++++++++-
src/cl_kernel.c | 11 +++++++++--
src/cl_mem.c | 1 +
src/cl_mem.h | 1 +
6 files changed, 43 insertions(+), 4 deletions(-)
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 37e78b4..280c928 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -127,6 +127,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
return CL_SUCCESS;
}
+LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k,
+ char * dst)
+{
+ int i;
+ for(i = 0; i < k->arg_n; i++) {
+ enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i);
+
+ if(arg_type == GBE_ARG_CONSTANT_PTR) {
+ uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER);
+ cl_mem mem = k->args[i].mem;
+ cl_buffer_map(mem->bo, 1);
+ void * addr = cl_buffer_get_virtual(mem->bo);
+ memcpy(dst + offset, addr, mem->size);
+ cl_buffer_unmap(mem->bo);
+ }
+ }
+ return CL_SUCCESS;
+}
+
#if USE_FULSIM
extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 1e2bcc1..7c571da 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -69,5 +69,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
/* Bind all the surfaces in the GPGPU state */
extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
+/*update constant buffer to final curbe */
+extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
#endif /* __CL_COMMAND_QUEUE_H__ */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 3a590bc..0b7b9a9 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -190,6 +190,13 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
size_t thread_n = 0u;
cl_int err = CL_SUCCESS;
+ gbe_kernel_recompile(ker->opaque);
+ ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
+ if(cst_sz != ker->curbe_sz) {
+ cl_kernel_setup(ker, ker->opaque);
+ cst_sz = ker->curbe_sz;
+ }
+
/* Setup kernel */
kernel.name = "KERNEL";
kernel.grf_blocks = 128;
@@ -224,8 +231,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
if (ker->curbe) {
assert(cst_sz > 0);
TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
- for (i = 0; i < thread_n; ++i)
+ for (i = 0; i < thread_n; ++i) {
memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
+ cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i);
+ }
TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
}
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 356a8a7..7777f4d 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -109,7 +109,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
if (UNLIKELY(value == NULL))
return CL_INVALID_KERNEL_ARGS;
offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
- assert(offset + sz <= k->curbe_sz);
+ //assert(offset + sz <= k->curbe_sz);
memcpy(k->curbe + offset, value, sz);
k->args[index].local_sz = 0;
k->args[index].is_set = 1;
@@ -152,6 +152,10 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
|| (arg_type != GBE_ARG_IMAGE && mem->is_image)))
return CL_INVALID_ARG_VALUE;
+
+ if(arg_type == GBE_ARG_CONSTANT_PTR) {
+ gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
+ }
cl_mem_add_ref(mem);
if (k->args[index].mem)
cl_mem_delete(k->args[index].mem);
@@ -175,6 +179,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
cl_context ctx = k->program->ctx;
cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
+ if(k->bo != NULL)
+ cl_buffer_unreference(k->bo);
+
/* Allocate the gen code here */
const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
const char *code = gbe_kernel_get_code(opaque);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3a8cfdd..e6961d1 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -79,6 +79,7 @@ cl_mem_allocate(cl_context ctx,
err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
goto error;
}
+ mem->size = sz;
/* Append the buffer in the context buffer list */
pthread_mutex_lock(&ctx->buffer_lock);
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 6992454..db391ee 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -35,6 +35,7 @@ struct _cl_mem {
uint64_t magic; /* To identify it as a memory object */
volatile int ref_n; /* This object is reference counted */
cl_buffer bo; /* Data in GPU memory */
+ size_t size; /* original request size, not alignment size, used in constant buffer */
cl_mem prev, next; /* We chain the memory buffers together */
cl_context ctx; /* Context it belongs to */
cl_mem_flags flags; /* Flags specified at the creation time */
--
1.7.9.5
More information about the Beignet
mailing list