[Beignet] [Printf][PATCH 09/11] Implement printf buffer management.

Yan Wang yan.wang at linux.intel.com
Wed Jan 20 19:31:04 PST 2016


Contributor: Junyan He <junyan.he at linux.intel.com>
Signed-off-by: Yan Wang <yan.wang at linux.intel.com>
---
 backend/src/backend/program.cpp | 10 +----
 backend/src/backend/program.h   | 12 +-----
 backend/src/backend/program.hpp |  7 ----
 backend/src/ir/printf.cpp       |  3 +-
 backend/src/ir/printf.hpp       |  3 +-
 backend/src/ir/profile.cpp      |  3 --
 backend/src/ir/profile.hpp      | 20 +++++-----
 src/cl_command_queue.c          | 21 +++--------
 src/cl_command_queue_gen7.c     | 34 +++++------------
 src/cl_driver.h                 | 12 +++---
 src/cl_gbe_loader.cpp           | 10 -----
 src/cl_gbe_loader.h             |  2 -
 src/intel/intel_gpgpu.c         | 82 ++++++++++-------------------------------
 src/intel/intel_gpgpu.h         |  4 +-
 14 files changed, 56 insertions(+), 167 deletions(-)

diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 4eca9f1..b3c3229 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -1271,15 +1271,11 @@ namespace gbe {
     delete ps;
   }
 
-  static void kernelOutputPrintf(void * printf_info, void* index_addr,
-                                 void* buf_addr, size_t global_wk_sz0,
-                                 size_t global_wk_sz1, size_t global_wk_sz2,
-                                 size_t output_sz)
+  static void kernelOutputPrintf(void * printf_info, void* buf_addr)
   {
     if (printf_info == NULL) return;
     ir::PrintfSet *ps = (ir::PrintfSet *)printf_info;
-    ps->outputPrintf(index_addr, buf_addr, global_wk_sz0,
-                         global_wk_sz1, global_wk_sz2, output_sz);
+    ps->outputPrintf(buf_addr);
   }
 
   static void kernelGetCompileWorkGroupSize(gbe_kernel gbeKernel, size_t wg_size[3]) {
@@ -1363,9 +1359,7 @@ GBE_EXPORT_SYMBOL gbe_get_profiling_bti_cb *gbe_get_profiling_bti = NULL;
 GBE_EXPORT_SYMBOL gbe_get_printf_num_cb *gbe_get_printf_num = NULL;
 GBE_EXPORT_SYMBOL gbe_dup_printfset_cb *gbe_dup_printfset = NULL;
 GBE_EXPORT_SYMBOL gbe_get_printf_buf_bti_cb *gbe_get_printf_buf_bti = NULL;
-GBE_EXPORT_SYMBOL gbe_get_printf_indexbuf_bti_cb *gbe_get_printf_indexbuf_bti = NULL;
 GBE_EXPORT_SYMBOL gbe_release_printf_info_cb *gbe_release_printf_info = NULL;
-GBE_EXPORT_SYMBOL gbe_get_printf_sizeof_size_cb *gbe_get_printf_sizeof_size = NULL;
 GBE_EXPORT_SYMBOL gbe_output_printf_cb *gbe_output_printf = NULL;
 
 #ifdef GBE_COMPILER_AVAILABLE
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index 45805f9..db770a6 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -97,8 +97,6 @@ enum gbe_curbe_type {
   GBE_CURBE_GROUP_NUM_Z,
   GBE_CURBE_WORK_DIM,
   GBE_CURBE_IMAGE_INFO,
-  GBE_CURBE_PRINTF_BUF_POINTER,
-  GBE_CURBE_PRINTF_INDEX_POINTER,
   GBE_CURBE_KERNEL_ARGUMENT,
   GBE_CURBE_EXTRA_ARGUMENT,
   GBE_CURBE_BLOCK_IP,
@@ -163,9 +161,6 @@ extern gbe_get_printf_num_cb *gbe_get_printf_num;
 typedef uint8_t (gbe_get_printf_buf_bti_cb)(void* printf_info);
 extern gbe_get_printf_buf_bti_cb *gbe_get_printf_buf_bti;
 
-typedef uint8_t (gbe_get_printf_indexbuf_bti_cb)(void* printf_info);
-extern gbe_get_printf_indexbuf_bti_cb *gbe_get_printf_indexbuf_bti;
-
 /*! Release the printfset */
 typedef void (gbe_release_printf_info_cb)(void* printf_info);
 extern gbe_release_printf_info_cb *gbe_release_printf_info;
@@ -174,12 +169,7 @@ extern gbe_release_printf_info_cb *gbe_release_printf_info;
 typedef void* (gbe_dup_printfset_cb)(gbe_kernel gbeKernel);
 extern gbe_dup_printfset_cb *gbe_dup_printfset;
 
-/*! Get the printf buffer const offset */
-typedef uint32_t (gbe_get_printf_sizeof_size_cb)(void* printf_info);
-extern gbe_get_printf_sizeof_size_cb *gbe_get_printf_sizeof_size;
-
-typedef void (gbe_output_printf_cb) (void* printf_info, void* index_addr, void* buf_addr,
-              size_t global_wk_sz0, size_t global_wk_sz1, size_t global_wk_sz2, size_t outbuf_sz);
+typedef void (gbe_output_printf_cb) (void* printf_info, void* buf_addr);
 extern gbe_output_printf_cb* gbe_output_printf;
 
 /*! Create a new program from the given source code (zero terminated string) */
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index e5c4b95..9570806 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -166,13 +166,6 @@ namespace gbe {
       return printfSet->getBufBTI();
     }
 
-    void outputPrintf(void* index_addr, void* buf_addr, size_t global_wk_sz0,
-                      size_t global_wk_sz1, size_t global_wk_sz2, size_t output_sz) {
-      if(printfSet)
-        printfSet->outputPrintf(index_addr, buf_addr, global_wk_sz0,
-                                global_wk_sz1, global_wk_sz2, output_sz);
-    }
-
     uint32_t getProfilingBufBTI() const {
       GBE_ASSERT(profilingInfo);
       return profilingInfo->getBTI();
diff --git a/backend/src/ir/printf.cpp b/backend/src/ir/printf.cpp
index 3873ca9..19daa19 100644
--- a/backend/src/ir/printf.cpp
+++ b/backend/src/ir/printf.cpp
@@ -99,8 +99,7 @@ namespace gbe
     } while (0)
 
 
-    void PrintfSet::outputPrintf(void* index_addr, void* buf_addr, size_t global_wk_sz0,
-                                 size_t global_wk_sz1, size_t global_wk_sz2, size_t output_sz)
+    void PrintfSet::outputPrintf(void* index_addr)
     {
       LockOutput lock;
     }
diff --git a/backend/src/ir/printf.hpp b/backend/src/ir/printf.hpp
index 6b2b741..fc36283 100644
--- a/backend/src/ir/printf.hpp
+++ b/backend/src/ir/printf.hpp
@@ -243,8 +243,7 @@ namespace gbe
         return 0;
       }
 
-      void outputPrintf(void* index_addr, void* buf_addr, size_t global_wk_sz0,
-                        size_t global_wk_sz1, size_t global_wk_sz2, size_t output_sz);
+      void outputPrintf(void* index_addr);
 
     private:
       std::map<uint32_t, PrintfFmt> fmts;
diff --git a/backend/src/ir/profile.cpp b/backend/src/ir/profile.cpp
index 4e0fc08..b16319a 100644
--- a/backend/src/ir/profile.cpp
+++ b/backend/src/ir/profile.cpp
@@ -42,7 +42,6 @@ namespace ir {
         "barrier_id", "thread_number", "work_dimension",
         "zero", "one",
         "retVal",
-        "printf_buffer_pointer", "printf_index_buffer_pointer",
         "dwblockip",
         "profiling_buffer_pointer",
         "profiling_timestamps0", "profiling_timestamps1",
@@ -88,8 +87,6 @@ namespace ir {
       DECL_NEW_REG(FAMILY_DWORD, zero, 1);
       DECL_NEW_REG(FAMILY_DWORD, one, 1);
       DECL_NEW_REG(FAMILY_WORD, retVal, 1);
-      DECL_NEW_REG(FAMILY_QWORD, printfbptr, 1, GBE_CURBE_PRINTF_BUF_POINTER);
-      DECL_NEW_REG(FAMILY_QWORD, printfiptr, 1, GBE_CURBE_PRINTF_INDEX_POINTER);
       DECL_NEW_REG(FAMILY_DWORD, dwblockip, 0, GBE_CURBE_DW_BLOCK_IP);
       DECL_NEW_REG(FAMILY_QWORD, profilingbptr, 1, GBE_CURBE_PROFILING_BUF_POINTER);
       DECL_NEW_REG(FAMILY_DWORD, profilingts0, 0, GBE_CURBE_PROFILING_TIMESTAMP0);
diff --git a/backend/src/ir/profile.hpp b/backend/src/ir/profile.hpp
index cc34720..eab7892 100644
--- a/backend/src/ir/profile.hpp
+++ b/backend/src/ir/profile.hpp
@@ -68,17 +68,15 @@ namespace ir {
     static const Register zero = Register(24);     //  scalar register holds zero.
     static const Register one = Register(25);     //  scalar register holds one. 
     static const Register retVal = Register(26);   // helper register to do data flow analysis.
-    static const Register printfbptr = Register(27); // printf buffer address .
-    static const Register printfiptr = Register(28); // printf index buffer address.
-    static const Register dwblockip = Register(29);  // blockip
-    static const Register profilingbptr = Register(30); // buffer addr for profiling.
-    static const Register profilingts0 = Register(31); // timestamp for profiling.
-    static const Register profilingts1 = Register(32); // timestamp for profiling.
-    static const Register profilingts2 = Register(33); // timestamp for profiling.
-    static const Register profilingts3 = Register(34); // timestamp for profiling.
-    static const Register profilingts4 = Register(35); // timestamp for profiling.
-    static const Register threadid = Register(36); // the thread id of this thread.
-    static const uint32_t regNum = 37;             // number of special registers
+    static const Register dwblockip = Register(27);  // blockip
+    static const Register profilingbptr = Register(28); // buffer addr for profiling.
+    static const Register profilingts0 = Register(29); // timestamp for profiling.
+    static const Register profilingts1 = Register(30); // timestamp for profiling.
+    static const Register profilingts2 = Register(31); // timestamp for profiling.
+    static const Register profilingts3 = Register(32); // timestamp for profiling.
+    static const Register profilingts4 = Register(33); // timestamp for profiling.
+    static const Register threadid = Register(34); // the thread id of this thread.
+    static const uint32_t regNum = 35;             // number of special registers
     extern const char *specialRegMean[];           // special register name.
   } /* namespace ocl */
 
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index d1acbbd..7a432a0 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -233,32 +233,21 @@ error:
 LOCAL int
 cl_command_queue_flush_gpgpu(cl_command_queue queue, cl_gpgpu gpgpu)
 {
-  size_t global_wk_sz[3];
-  size_t outbuf_sz = 0;
-  void* printf_info = cl_gpgpu_get_printf_info(gpgpu, global_wk_sz, &outbuf_sz);
+  void* printf_info = cl_gpgpu_get_printf_info(gpgpu);
   void* profiling_info;
 
   if (cl_gpgpu_flush(gpgpu) < 0)
     return CL_OUT_OF_RESOURCES;
 
   if (printf_info && interp_get_printf_num(printf_info)) {
-    void *index_addr = cl_gpgpu_map_printf_buffer(gpgpu, 0);
-    void *buf_addr = NULL;
-    if (interp_get_printf_sizeof_size(printf_info))
-      buf_addr = cl_gpgpu_map_printf_buffer(gpgpu, 1);
-
-    interp_output_printf(printf_info, index_addr, buf_addr, global_wk_sz[0],
-                      global_wk_sz[1], global_wk_sz[2], outbuf_sz);
-
-    cl_gpgpu_unmap_printf_buffer(gpgpu, 0);
-    if (interp_get_printf_sizeof_size(printf_info))
-      cl_gpgpu_unmap_printf_buffer(gpgpu, 1);
+    void *addr = cl_gpgpu_map_printf_buffer(gpgpu);
+    interp_output_printf(printf_info, addr);
+    cl_gpgpu_unmap_printf_buffer(gpgpu);
   }
 
   if (printf_info) {
     interp_release_printf_info(printf_info);
-    global_wk_sz[0] = global_wk_sz[1] = global_wk_sz[2] = 0;
-    cl_gpgpu_set_printf_info(gpgpu, NULL, global_wk_sz);
+    cl_gpgpu_set_printf_info(gpgpu, NULL);
   }
 
   /* If have profiling info, output it. */
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 791a7ca..6c0ae16 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -309,31 +309,17 @@ cl_bind_profiling(cl_gpgpu gpgpu, uint32_t simd_sz, cl_kernel ker, size_t global
   return 0;
 }
 
+
 static int
-cl_bind_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
-  int32_t value = GBE_CURBE_PRINTF_INDEX_POINTER;
-  int32_t offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0);
-  size_t buf_size = global_sz * sizeof(int) * printf_num;
-  if (offset > 0) {
-    if (cl_gpgpu_set_printf_buffer(gpgpu, 0, buf_size*2, offset, interp_get_printf_indexbuf_bti(printf_info)) != 0)
-      return -1;
-  }
+cl_alloc_printf(cl_gpgpu gpgpu, cl_kernel ker, void* printf_info, int printf_num, size_t global_sz) {
+  /* An guess size. */
+  size_t buf_size = global_sz * sizeof(int) * 16 * printf_num;
+  if (buf_size > 16*1024*1024) //at most.
+    buf_size = 16*1024*1024;
 
-  value = GBE_CURBE_PRINTF_BUF_POINTER;
-  offset = interp_kernel_get_curbe_offset(ker->opaque, value, 0);
-  buf_size = interp_get_printf_sizeof_size(printf_info) * global_sz;
-  /* because of the printf may exist in a loop, which loop number can not be gotten by
-     static analysis. So we set the data buffer as big as we can. Out of bound printf
-     info will be discarded. */
-  if (buf_size < 1*1024)
-    buf_size = 1*1024*1024;
-  else
-    buf_size = 16*1024*1024; //at most.
+  if (cl_gpgpu_set_printf_buffer(gpgpu, buf_size, interp_get_printf_buf_bti(printf_info)) != 0)
+	return -1;
 
-  if (offset > 0) {
-    if (cl_gpgpu_set_printf_buffer(gpgpu, 1, buf_size, offset, interp_get_printf_buf_bti(printf_info)) != 0)
-      return -1;
-  }
   return 0;
 }
 
@@ -389,7 +375,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   }
 
   printf_info = interp_dup_printfset(ker->opaque);
-  cl_gpgpu_set_printf_info(gpgpu, printf_info, (size_t *)global_wk_sz);
+  cl_gpgpu_set_printf_info(gpgpu, printf_info);
 
   /* Setup the kernel */
   if (queue->props & CL_QUEUE_PROFILING_ENABLE)
@@ -400,7 +386,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
     goto error;
   printf_num = interp_get_printf_num(printf_info);
   if (printf_num) {
-    if (cl_bind_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0)
+    if (cl_alloc_printf(gpgpu, ker, printf_info, printf_num, global_size) != 0)
       goto error;
   }
   if (interp_get_profiling_bti(ker->opaque) != 0) {
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 7081bea..16730db 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -290,7 +290,7 @@ typedef void (cl_gpgpu_unmap_profiling_buffer_cb)(cl_gpgpu);
 extern cl_gpgpu_unmap_profiling_buffer_cb *cl_gpgpu_unmap_profiling_buffer;
 
 /* Set the printf buffer */
-typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint32_t, uint32_t, uint8_t);
+typedef int (cl_gpgpu_set_printf_buffer_cb)(cl_gpgpu, uint32_t, uint8_t);
 extern cl_gpgpu_set_printf_buffer_cb *cl_gpgpu_set_printf_buffer;
 
 /* get the printf buffer offset in the apeture*/
@@ -298,23 +298,23 @@ typedef unsigned long (cl_gpgpu_reloc_printf_buffer_cb)(cl_gpgpu, uint32_t, uint
 extern cl_gpgpu_reloc_printf_buffer_cb *cl_gpgpu_reloc_printf_buffer;
 
 /* map the printf buffer */
-typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu, uint32_t);
+typedef void* (cl_gpgpu_map_printf_buffer_cb)(cl_gpgpu);
 extern cl_gpgpu_map_printf_buffer_cb *cl_gpgpu_map_printf_buffer;
 
 /* unmap the printf buffer */
-typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu, uint32_t);
+typedef void (cl_gpgpu_unmap_printf_buffer_cb)(cl_gpgpu);
 extern cl_gpgpu_unmap_printf_buffer_cb *cl_gpgpu_unmap_printf_buffer;
 
 /* release the printf buffer */
-typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu, uint32_t);
+typedef unsigned long (cl_gpgpu_release_printf_buffer_cb)(cl_gpgpu);
 extern cl_gpgpu_release_printf_buffer_cb *cl_gpgpu_release_printf_buffer;
 
 /* Set the last printfset pointer */
-typedef int (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *, size_t*);
+typedef int (cl_gpgpu_set_printf_info_cb)(cl_gpgpu, void *);
 extern cl_gpgpu_set_printf_info_cb *cl_gpgpu_set_printf_info;
 
 /* Get the last printfset pointer */
-typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu, size_t*, size_t*);
+typedef void* (cl_gpgpu_get_printf_info_cb)(cl_gpgpu);
 extern cl_gpgpu_get_printf_info_cb *cl_gpgpu_get_printf_info;
 
 /* Will spawn all threads */
diff --git a/src/cl_gbe_loader.cpp b/src/cl_gbe_loader.cpp
index 0ec6c96..7121c90 100644
--- a/src/cl_gbe_loader.cpp
+++ b/src/cl_gbe_loader.cpp
@@ -69,9 +69,7 @@ gbe_get_profiling_bti_cb* interp_get_profiling_bti = NULL;
 gbe_dup_profiling_cb* interp_dup_profiling = NULL;
 gbe_get_printf_num_cb* interp_get_printf_num = NULL;
 gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti = NULL;
-gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti = NULL;
 gbe_dup_printfset_cb* interp_dup_printfset = NULL;
-gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size = NULL;
 gbe_release_printf_info_cb* interp_release_printf_info = NULL;
 gbe_output_printf_cb* interp_output_printf = NULL;
 gbe_kernel_get_arg_info_cb *interp_kernel_get_arg_info = NULL;
@@ -236,18 +234,10 @@ struct GbeLoaderInitializer
     if (interp_get_printf_buf_bti == NULL)
       return false;
 
-    interp_get_printf_indexbuf_bti = *(gbe_get_printf_indexbuf_bti_cb**)dlsym(dlhInterp, "gbe_get_printf_indexbuf_bti");
-    if (interp_get_printf_indexbuf_bti == NULL)
-      return false;
-
     interp_dup_printfset = *(gbe_dup_printfset_cb**)dlsym(dlhInterp, "gbe_dup_printfset");
     if (interp_dup_printfset == NULL)
       return false;
 
-    interp_get_printf_sizeof_size = *(gbe_get_printf_sizeof_size_cb**)dlsym(dlhInterp, "gbe_get_printf_sizeof_size");
-    if (interp_get_printf_sizeof_size == NULL)
-      return false;
-
     interp_release_printf_info = *(gbe_release_printf_info_cb**)dlsym(dlhInterp, "gbe_release_printf_info");
     if (interp_release_printf_info == NULL)
       return false;
diff --git a/src/cl_gbe_loader.h b/src/cl_gbe_loader.h
index d72854c..df808a5 100644
--- a/src/cl_gbe_loader.h
+++ b/src/cl_gbe_loader.h
@@ -69,9 +69,7 @@ extern gbe_get_profiling_bti_cb* interp_get_profiling_bti;
 extern gbe_dup_profiling_cb* interp_dup_profiling;
 extern gbe_get_printf_num_cb* interp_get_printf_num;
 extern gbe_get_printf_buf_bti_cb* interp_get_printf_buf_bti;
-extern gbe_get_printf_indexbuf_bti_cb* interp_get_printf_indexbuf_bti;
 extern gbe_dup_printfset_cb* interp_dup_printfset;
-extern gbe_get_printf_sizeof_size_cb* interp_get_printf_sizeof_size;
 extern gbe_release_printf_info_cb* interp_release_printf_info;
 extern gbe_output_printf_cb* interp_output_printf;
 extern gbe_kernel_get_arg_info_cb *interp_kernel_get_arg_info;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index ac92ed3..f9ff4a0 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -142,8 +142,6 @@ intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
     drm_intel_bo_unreference(gpgpu->time_stamp_b.bo);
   if(gpgpu->printf_b.bo)
     drm_intel_bo_unreference(gpgpu->printf_b.bo);
-  if(gpgpu->printf_b.ibo)
-    drm_intel_bo_unreference(gpgpu->printf_b.ibo);
   if (gpgpu->aux_buf.bo)
     drm_intel_bo_unreference(gpgpu->aux_buf.bo);
   if (gpgpu->perf_b.bo)
@@ -914,9 +912,6 @@ intel_gpgpu_state_init(intel_gpgpu_t *gpgpu,
   gpgpu->curb.size_cs_entry = size_cs_entry;
   gpgpu->max_threads = max_threads;
 
-  if (gpgpu->printf_b.ibo)
-    dri_bo_unreference(gpgpu->printf_b.ibo);
-  gpgpu->printf_b.ibo = NULL;
   if (gpgpu->printf_b.bo)
     dri_bo_unreference(gpgpu->printf_b.bo);
   gpgpu->printf_b.bo = NULL;
@@ -2331,32 +2326,22 @@ intel_gpgpu_get_profiling_info(intel_gpgpu_t *gpgpu)
 }
 
 static int
-intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i, uint32_t size, uint32_t offset, uint8_t bti)
+intel_gpgpu_set_printf_buf(intel_gpgpu_t *gpgpu, uint32_t size, uint8_t bti)
 {
-  drm_intel_bo *bo = NULL;
-  if (i == 0) { // the index buffer.
-    if (gpgpu->printf_b.ibo)
-      dri_bo_unreference(gpgpu->printf_b.ibo);
-    gpgpu->printf_b.ibo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf index buffer", size, 4096);
-    bo = gpgpu->printf_b.ibo;
-  } else if (i == 1) {
-    if (gpgpu->printf_b.bo)
-      dri_bo_unreference(gpgpu->printf_b.bo);
-    gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf output buffer", size, 4096);
-    bo = gpgpu->printf_b.bo;
-  } else
-    assert(0);
+  if (gpgpu->printf_b.bo)
+    dri_bo_unreference(gpgpu->printf_b.bo);
+  gpgpu->printf_b.bo = dri_bo_alloc(gpgpu->drv->bufmgr, "Printf buffer", size, 4096);
 
-  if (!bo || (drm_intel_bo_map(bo, 1) != 0)) {
-    if (gpgpu->printf_b.bo)
-      drm_intel_bo_unreference(gpgpu->printf_b.bo);
-    gpgpu->printf_b.bo = NULL;
+  if (!gpgpu->printf_b.bo || (drm_intel_bo_map(gpgpu->printf_b.bo, 1) != 0)) {
     fprintf(stderr, "%s:%d: %s.\n", __FILE__, __LINE__, strerror(errno));
     return -1;
   }
-  memset(bo->virtual, 0, size);
-  drm_intel_bo_unmap(bo);
-  cl_gpgpu_bind_buf((cl_gpgpu)gpgpu, (cl_buffer)bo, offset, 0, size, bti);
+
+  memset(gpgpu->printf_b.bo->virtual, 0, size);
+  *(uint32_t *)(gpgpu->printf_b.bo->virtual) = 4; // first four is for the length.
+  drm_intel_bo_unmap(gpgpu->printf_b.bo);
+  /* No need to bind, we do not need to emit reloc. */
+  intel_gpgpu_setup_bti(gpgpu, gpgpu->printf_b.bo, 0, size, bti, I965_SURFACEFORMAT_RAW);
   return 0;
 }
 
@@ -2379,65 +2364,38 @@ intel_gpgpu_unmap_profiling_buf_addr(intel_gpgpu_t *gpgpu)
 
 
 static void*
-intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
+intel_gpgpu_map_printf_buf(intel_gpgpu_t *gpgpu)
 {
   drm_intel_bo *bo = NULL;
-  if (i == 0) {
-    bo = gpgpu->printf_b.ibo;
-  } else if (i == 1) {
-    bo = gpgpu->printf_b.bo;
-  } else
-    assert(0);
-
+  bo = gpgpu->printf_b.bo;
   drm_intel_bo_map(bo, 1);
   return bo->virtual;
 }
 
 static void
-intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu, uint32_t i)
+intel_gpgpu_unmap_printf_buf_addr(intel_gpgpu_t *gpgpu)
 {
   drm_intel_bo *bo = NULL;
-  if (i == 0) {
-    bo = gpgpu->printf_b.ibo;
-  } else if (i == 1) {
-    bo = gpgpu->printf_b.bo;
-  } else
-  assert(0);
-
+  bo = gpgpu->printf_b.bo;
   drm_intel_bo_unmap(bo);
 }
 
 static void
-intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu, uint32_t i)
+intel_gpgpu_release_printf_buf(intel_gpgpu_t *gpgpu)
 {
-  if (i == 0) {
-    drm_intel_bo_unreference(gpgpu->printf_b.ibo);
-    gpgpu->printf_b.ibo = NULL;
-  } else if (i == 1) {
-    drm_intel_bo_unreference(gpgpu->printf_b.bo);
-    gpgpu->printf_b.bo = NULL;
-  } else
-    assert(0);
+  drm_intel_bo_unreference(gpgpu->printf_b.bo);
+  gpgpu->printf_b.bo = NULL;
 }
 
 static void
-intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info, size_t * global_sz)
+intel_gpgpu_set_printf_info(intel_gpgpu_t *gpgpu, void* printf_info)
 {
   gpgpu->printf_info = printf_info;
-  gpgpu->global_wk_sz[0] = global_sz[0];
-  gpgpu->global_wk_sz[1] = global_sz[1];
-  gpgpu->global_wk_sz[2] = global_sz[2];
 }
 
 static void*
-intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu, size_t * global_sz, size_t *outbuf_sz)
+intel_gpgpu_get_printf_info(intel_gpgpu_t *gpgpu)
 {
-  global_sz[0] = gpgpu->global_wk_sz[0];
-  global_sz[1] = gpgpu->global_wk_sz[1];
-  global_sz[2] = gpgpu->global_wk_sz[2];
-
-  if (gpgpu->printf_b.bo)
-    *outbuf_sz = gpgpu->printf_b.bo->size;
   return gpgpu->printf_info;
 }
 
diff --git a/src/intel/intel_gpgpu.h b/src/intel/intel_gpgpu.h
index ccbf2fa..904f9e0 100644
--- a/src/intel/intel_gpgpu.h
+++ b/src/intel/intel_gpgpu.h
@@ -44,7 +44,6 @@ struct intel_batchbuffer;
 struct intel_gpgpu
 {
   void* ker_opaque;
-  size_t global_wk_sz[3];
   void* printf_info;
   void* profiling_info;
   struct intel_driver *drv;
@@ -65,8 +64,7 @@ struct intel_gpgpu
   struct { drm_intel_bo *bo; } scratch_b;
   struct { drm_intel_bo *bo; } constant_b;
   struct { drm_intel_bo *bo; } time_stamp_b;  /* time stamp buffer */
-  struct { drm_intel_bo *bo;
-           drm_intel_bo *ibo;} printf_b;      /* the printf buf and index buf*/
+  struct { drm_intel_bo *bo; } printf_b;      /* the printf buf and index buf*/
   struct { drm_intel_bo *bo; } profiling_b;   /* the buf for profiling*/
   struct { drm_intel_bo *bo; } aux_buf;
   struct {
-- 
2.4.3



More information about the Beignet mailing list