[Beignet] [PATCH v3 3/4] Add constant pointer as argument support in runtime.
Yang Rong
rong.r.yang at intel.com
Sun Apr 21 22:11:51 PDT 2013
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_command_queue.c | 22 ++++++++++++++++++++--
src/cl_command_queue.h | 2 ++
src/cl_command_queue_gen7.c | 7 +++++--
src/cl_kernel.c | 16 +++++++++++++++-
src/cl_mem.c | 1 +
src/cl_mem.h | 1 +
6 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index a22884f..7d604c3 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -108,7 +108,6 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
uint32_t offset; // location of the address in the curbe
arg_type = gbe_kernel_get_arg_type(k->opaque, i);
if (arg_type != GBE_ARG_GLOBAL_PTR &&
- arg_type != GBE_ARG_CONSTANT_PTR &&
arg_type != GBE_ARG_IMAGE &&
arg_type != GBE_ARG_SAMPLER)
continue;
@@ -129,6 +128,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
return CL_SUCCESS;
}
+LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k,
+ char * dst)
+{
+ int i;
+ for(i = 0; i < k->arg_n; i++) {
+ enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i);
+
+ if(arg_type == GBE_ARG_CONSTANT_PTR) {
+ uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER);
+ cl_mem mem = k->args[i].mem;
+ cl_buffer_map(mem->bo, 1);
+ void * addr = cl_buffer_get_virtual(mem->bo);
+ memcpy(dst + offset, addr, mem->size);
+ cl_buffer_unmap(mem->bo);
+ }
+ }
+ return CL_SUCCESS;
+}
+
#if USE_FULSIM
extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 6387ae1..dcfc8c4 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -70,5 +70,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
/* Bind all the surfaces in the GPGPU state */
extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
+/*update constant buffer to final curbe */
+extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
#endif /* __CL_COMMAND_QUEUE_H__ */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 3a590bc..9402549 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -186,7 +186,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
char *final_curbe = NULL; /* Includes them and one sub-buffer per group */
cl_gpgpu_kernel kernel;
const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
- size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz;
+ size_t i, batch_sz = 0u, local_sz = 0u;
+ size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
size_t thread_n = 0u;
cl_int err = CL_SUCCESS;
@@ -224,8 +225,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
if (ker->curbe) {
assert(cst_sz > 0);
TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
- for (i = 0; i < thread_n; ++i)
+ for (i = 0; i < thread_n; ++i) {
memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
+ cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i);
+ }
TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
}
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index bbd4438..ec0e2e8 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -1,4 +1,4 @@
-/*
+/*
* Copyright © 2012 Intel Corporation
*
* This library is free software; you can redistribute it and/or
@@ -154,6 +154,17 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
|| (arg_type != GBE_ARG_IMAGE && mem->is_image)))
return CL_INVALID_ARG_VALUE;
+
+ if(arg_type == GBE_ARG_CONSTANT_PTR) {
+ int32_t cbOffset;
+ cbOffset = gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
+ //constant ptr's curbe offset changed, update it
+ if(cbOffset >= 0) {
+ offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+ *((uint32_t *)(k->curbe + offset)) = cbOffset; //cb offset in curbe
+ }
+ }
+
cl_mem_add_ref(mem);
if (k->args[index].mem)
cl_mem_delete(k->args[index].mem);
@@ -177,6 +188,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
cl_context ctx = k->program->ctx;
cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
+ if(k->bo != NULL)
+ cl_buffer_unreference(k->bo);
+
/* Allocate the gen code here */
const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
const char *code = gbe_kernel_get_code(opaque);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index e89aafa..7d070d4 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -81,6 +81,7 @@ cl_mem_allocate(cl_context ctx,
err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
goto error;
}
+ mem->size = sz;
/* Append the buffer in the context buffer list */
pthread_mutex_lock(&ctx->buffer_lock);
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 8e7a2dd..a0b6164 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -36,6 +36,7 @@ struct _cl_mem {
uint64_t magic; /* To identify it as a memory object */
volatile int ref_n; /* This object is reference counted */
cl_buffer bo; /* Data in GPU memory */
+ size_t size; /* original request size, not alignment size, used in constant buffer */
cl_mem prev, next; /* We chain the memory buffers together */
cl_context ctx; /* Context it belongs to */
cl_mem_flags flags; /* Flags specified at the creation time */
--
1.7.9.5
More information about the Beignet
mailing list