[Beignet] [PATCH v3 3/4] Add constant pointer as argument support in runtime.

Yang Rong rong.r.yang at intel.com
Sun Apr 21 22:11:51 PDT 2013


Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 src/cl_command_queue.c      |   22 ++++++++++++++++++++--
 src/cl_command_queue.h      |    2 ++
 src/cl_command_queue_gen7.c |    7 +++++--
 src/cl_kernel.c             |   16 +++++++++++++++-
 src/cl_mem.c                |    1 +
 src/cl_mem.h                |    1 +
 6 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index a22884f..7d604c3 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -108,7 +108,6 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
     uint32_t offset; // location of the address in the curbe
     arg_type = gbe_kernel_get_arg_type(k->opaque, i);
     if (arg_type != GBE_ARG_GLOBAL_PTR &&
-        arg_type != GBE_ARG_CONSTANT_PTR &&
         arg_type != GBE_ARG_IMAGE &&
         arg_type != GBE_ARG_SAMPLER)
       continue;
@@ -129,6 +128,25 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k)
   return CL_SUCCESS;
 }
 
+LOCAL cl_int cl_command_queue_upload_constant_buffer(cl_kernel k,
+                                                       char * dst)
+{
+  int i;
+  for(i = 0; i < k->arg_n; i++) {
+    enum gbe_arg_type arg_type = gbe_kernel_get_arg_type(k->opaque, i);
+
+    if(arg_type == GBE_ARG_CONSTANT_PTR) {
+      uint32_t offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_EXTRA_ARGUMENT, i+GBE_CONSTANT_BUFFER);
+      cl_mem mem = k->args[i].mem;
+      cl_buffer_map(mem->bo, 1);
+      void * addr = cl_buffer_get_virtual(mem->bo);
+      memcpy(dst + offset, addr, mem->size);
+      cl_buffer_unmap(mem->bo);
+    }
+  }
+  return CL_SUCCESS;
+}
+
 #if USE_FULSIM
 extern void drm_intel_bufmgr_gem_stop_aubfile(cl_buffer_mgr);
 extern void drm_intel_bufmgr_gem_set_aubfile(cl_buffer_mgr, FILE*);
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 6387ae1..dcfc8c4 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -70,5 +70,7 @@ extern cl_int cl_command_queue_finish(cl_command_queue);
 /* Bind all the surfaces in the GPGPU state */
 extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel);
 
+/*update constant buffer to final curbe */
+extern cl_int cl_command_queue_upload_constant_buffer(cl_kernel k, char * dst);
 #endif /* __CL_COMMAND_QUEUE_H__ */
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 3a590bc..9402549 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -186,7 +186,8 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   char *final_curbe = NULL;  /* Includes them and one sub-buffer per group */
   cl_gpgpu_kernel kernel;
   const uint32_t simd_sz = cl_kernel_get_simd_width(ker);
-  size_t i, batch_sz = 0u, local_sz = 0u, cst_sz = ker->curbe_sz;
+  size_t i, batch_sz = 0u, local_sz = 0u;
+  size_t cst_sz = ker->curbe_sz= gbe_kernel_get_curbe_size(ker->opaque);
   size_t thread_n = 0u;
   cl_int err = CL_SUCCESS;
 
@@ -224,8 +225,10 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   if (ker->curbe) {
     assert(cst_sz > 0);
     TRY_ALLOC (final_curbe, (char*) alloca(thread_n * cst_sz));
-      for (i = 0; i < thread_n; ++i)
+    for (i = 0; i < thread_n; ++i) {
         memcpy(final_curbe + cst_sz * i, ker->curbe, cst_sz);
+        cl_command_queue_upload_constant_buffer(ker, final_curbe + cst_sz * i);
+    }
     TRY (cl_set_varying_payload, ker, final_curbe, local_wk_sz, simd_sz, cst_sz, thread_n);
     cl_gpgpu_upload_constants(gpgpu, final_curbe, thread_n*cst_sz);
   }
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index bbd4438..ec0e2e8 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -154,6 +154,17 @@ cl_kernel_set_arg(cl_kernel k, cl_uint index, size_t sz, const void *value)
   if (UNLIKELY((arg_type == GBE_ARG_IMAGE && !mem->is_image)
      || (arg_type != GBE_ARG_IMAGE && mem->is_image)))
       return CL_INVALID_ARG_VALUE;
+
+  if(arg_type == GBE_ARG_CONSTANT_PTR) {
+    int32_t cbOffset;
+    cbOffset = gbe_kernel_set_const_buffer_size(k->opaque, index, mem->size);
+    //constant ptr's curbe offset changed, update it
+    if(cbOffset >= 0) {
+      offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, index);
+      *((uint32_t *)(k->curbe + offset)) = cbOffset;  //cb offset in curbe
+    }
+  }
+
   cl_mem_add_ref(mem);
   if (k->args[index].mem)
     cl_mem_delete(k->args[index].mem);
@@ -177,6 +188,9 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
   cl_context ctx = k->program->ctx;
   cl_buffer_mgr bufmgr = cl_context_get_bufmgr(ctx);
 
+  if(k->bo != NULL)
+    cl_buffer_unreference(k->bo);
+
   /* Allocate the gen code here */
   const uint32_t code_sz = gbe_kernel_get_code_size(opaque);
   const char *code = gbe_kernel_get_code(opaque);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index e89aafa..7d070d4 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -81,6 +81,7 @@ cl_mem_allocate(cl_context ctx,
     err = CL_MEM_OBJECT_ALLOCATION_FAILURE;
     goto error;
   }
+  mem->size = sz;
 
   /* Append the buffer in the context buffer list */
   pthread_mutex_lock(&ctx->buffer_lock);
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 8e7a2dd..a0b6164 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -36,6 +36,7 @@ struct _cl_mem {
   uint64_t magic;           /* To identify it as a memory object */
   volatile int ref_n;       /* This object is reference counted */
   cl_buffer bo;             /* Data in GPU memory */
+  size_t size;              /* original request size, not alignment size, used in constant buffer */
   cl_mem prev, next;        /* We chain the memory buffers together */
   cl_context ctx;           /* Context it belongs to */
   cl_mem_flags flags;       /* Flags specified at the creation time */
-- 
1.7.9.5



More information about the Beignet mailing list