[Beignet] [PATCH V2 5/6] Add set constant buffer and recompile in run time.

Yang Rong rong.r.yang at intel.com
Thu Apr 18 00:18:06 PDT 2013


Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_command_queue.c      |   21 ++++++++++++++++++++-
 src/cl_command_queue.h      |    2 ++
 src/cl_command_queue_gen7.c |   11 ++++++++++-
 src/cl_kernel.c             |   11 +++++++++--
 src/cl_mem.c                |    1 +
 src/cl_mem.h                |    1 +
 6 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 37e78b4..280c928 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -127,6 +127,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
   return CL_SUCCESS;
 }
 
+LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k,
+                                                       char * dst)
+{
+  int i;
+  for(i = 0; i < k->arg_n; i++) {
+    enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i);
+
+    if(arg_type == GBE_ARG_CONSTANT_PTR) {
+      uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER);
+      cl_mem mem = k->args[i].mem;
+      cl_buffer_map(mem->bo, 1);
+      void * addr = cl_buffer_get_virtual(mem->bo);
+      memcpy(dst + offset, addr, mem->size);
+      cl_buffer_unmap(mem->bo);
+    }
+  }
+  return CL_SUCCESS;
+}
+
 #if USE_FULSIM
 extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
 extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 1e2bcc1..7c571da 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -69,5 +69,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
 /* Bind all the surfaces in the GPGPU state */
 extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
 
+/*update constant buffer to final curbe */
+extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
 #endif /* __CL_COMMAND_QUEUE_H__ */
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 3a590bc..0b7b9a9 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -190,6 +190,13 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   size_t thread_n = 0u;
   cl_int err = CL_SUCCESS;
 
+  gbe_kernel_recompile(ker->opaque);
+  ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
+  if(cst_sz != ker->curbe_sz) {
+    cl_kernel_setup(ker, ker->opaque);
+    cst_sz = ker->curbe_sz;
+  }
+
   /* Setup kernel */
   kernel.name = "KERNEL";
   kernel.grf_blocks = 128;
@@ -224,8 +231,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   if (ker->curbe) {
     assert(cst_sz > 0);
     TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
-      for (i = 0; i < thread_n; ++i)
+    for (i = 0; i < thread_n; ++i) {
         memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
+        cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i);
+    }
     TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
     cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
   }
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 356a8a7..7777f4d 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -109,7 +109,7 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
     if (UNLIKELY(value == NULL))
       return CL_INVALID_KERNEL_ARGS;
     offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
-    assert(offset + sz <= k->curbe_sz);
+    //assert(offset + sz <= k->curbe_sz);
     memcpy(k->curbe + offset, value, sz);
     k->args[index].local_sz = 0;
     k->args[index].is_set = 1;
@@ -152,6 +152,10 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
   if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
      || (arg_type != GBE_ARG_IMAGE && mem->is_image)))
       return CL_INVALID_ARG_VALUE;
+
+  if(arg_type == GBE_ARG_CONSTANT_PTR) {
+    gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
+  }
   cl_mem_add_ref(mem);
   if (k->args[index].mem)
     cl_mem_delete(k->args[index].mem);
@@ -175,6 +179,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
   cl_context ctx = k->program->ctx;
   cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
 
+  if(k->bo != NULL)
+    cl_buffer_unreference(k->bo);
+
   /* Allocate the gen code here */
   const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
   const char *code = gbe_kernel_get_code(opaque);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 3a8cfdd..e6961d1 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -79,6 +79,7 @@ cl_mem_allocate(cl_context ctx,
     err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
     goto error;
   }
+  mem->size = sz;
 
   /* Append the buffer in the context buffer list */
   pthread_mutex_lock(&ctx->buffer_lock);
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 6992454..db391ee 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -35,6 +35,7 @@ struct _cl_mem {
   uint64_t magic;           /* To identify it as a memory object */
   volatile int ref_n;       /* This object is reference counted */
   cl_buffer bo;             /* Data in GPU memory */
+  size_t size;              /* original request size, not alignment size, used in constant buffer */
   cl_mem prev, next;        /* We chain the memory buffers together */
   cl_context ctx;           /* Context it belongs to */
   cl_mem_flags flags;       /* Flags specified at the creation time */
-- 
1.7.9.5



More information about the Beignet mailing list