[Beignet] [PATCH newRT] Wrap all memory allocate functions.

junyan.he at inbox.com junyan.he at inbox.com
Thu Mar 23 07:45:40 UTC 2017


From: Junyan He <junyan.he at intel.com>

We modify all memory allocated functions in cl_alloc file, make it
easy to debug all the memory leak point.

Signed-off-by: Junyan He <junyan.he at intel.com>
---
 src/cl_accelerator_intel.c     |   4 +-
 src/cl_alloc.c                 | 197 ++++++++++++++++++++++++++++++++++-------
 src/cl_alloc.h                 |  43 +++++++--
 src/cl_api.c                   |   3 +-
 src/cl_api_context.c           |   4 +-
 src/cl_api_kernel.c            |  12 +--
 src/cl_command_queue.c         |  12 +--
 src/cl_command_queue_enqueue.c |   6 +-
 src/cl_command_queue_gen7.c    |   2 +-
 src/cl_context.c               |  14 +--
 src/cl_device_enqueue.c        |   2 +-
 src/cl_enqueue.c               |   6 +-
 src/cl_event.c                 |  20 ++---
 src/cl_kernel.c                |  30 +++----
 src/cl_mem.c                   |  28 +++---
 src/cl_program.c               |  54 +++++------
 src/cl_sampler.c               |   4 +-
 src/cl_utils.h                 |   3 -
 src/gen/cl_command_queue_gen.c |  12 +--
 src/gen/cl_kernel_gen.c        |  28 +++---
 src/gen/cl_program_gen.c       |  12 +--
 src/intel/intel_batchbuffer.c  |   4 +-
 src/intel/intel_driver.c       |   8 +-
 src/intel/intel_gpgpu.c        |  18 ++--
 src/x11/dricommon.c            |   6 +-
 25 files changed, 342 insertions(+), 190 deletions(-)

diff --git a/src/cl_accelerator_intel.c b/src/cl_accelerator_intel.c
index ae08184..62700b2 100644
--- a/src/cl_accelerator_intel.c
+++ b/src/cl_accelerator_intel.c
@@ -18,7 +18,7 @@ cl_accelerator_intel_new(cl_context ctx,
   cl_int err = CL_SUCCESS;
 
   /* Allocate and inialize the structure itself */
-  TRY_ALLOC(accel, CALLOC(struct _cl_accelerator_intel));
+  TRY_ALLOC(accel, CL_CALLOC(1, sizeof(struct _cl_accelerator_intel)));
   CL_OBJECT_INIT_BASE(accel, CL_OBJECT_ACCELERATOR_INTEL_MAGIC);
 
   if (accel_type != CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL) {
@@ -81,5 +81,5 @@ cl_accelerator_intel_delete(cl_accelerator_intel accel)
 
   cl_context_delete(accel->ctx);
   CL_OBJECT_DESTROY_BASE(accel);
-  cl_free(accel);
+  CL_FREE(accel);
 }
diff --git a/src/cl_alloc.c b/src/cl_alloc.c
index e532569..b9ac853 100644
--- a/src/cl_alloc.c
+++ b/src/cl_alloc.c
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -14,75 +14,204 @@
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  *
- * Author: Benjamin Segovia <benjamin.segovia at intel.com>
  */
-
 #include "cl_alloc.h"
 #include "cl_utils.h"
-
+#include "cl_device_id.h"
 #include <stdlib.h>
 #include <assert.h>
 #include <malloc.h>
+#include <pthread.h>
+#include <string.h>
+
+#ifdef CL_ALLOC_DEBUG
+
+static pthread_mutex_t cl_alloc_log_lock;
+#define MAX_ALLOC_LOG_NUM 1024 * 1024
+static unsigned int cl_alloc_log_num;
+
+typedef struct _cl_alloc_log_item {
+  void *ptr;
+  size_t size;
+  char *file;
+  int line;
+} _cl_alloc_log_item;
+typedef struct _cl_alloc_log_item *cl_alloc_log_item;
+
+#define ALLOC_LOG_BUCKET_SZ 128
+static cl_alloc_log_item *cl_alloc_log_map[ALLOC_LOG_BUCKET_SZ];
+static int cl_alloc_log_map_size[ALLOC_LOG_BUCKET_SZ];
+
+LOCAL void cl_alloc_debug_init(void)
+{
+  static int inited = 0;
+  int i;
+  if (inited)
+    return;
+
+  pthread_mutex_init(&cl_alloc_log_lock, NULL);
+
+  for (i = 0; i < ALLOC_LOG_BUCKET_SZ; i++) {
+    cl_alloc_log_map_size[i] = 128;
+    cl_alloc_log_map[i] = malloc(cl_alloc_log_map_size[i] * sizeof(cl_alloc_log_item));
+    memset(cl_alloc_log_map[i], 0, cl_alloc_log_map_size[i] * sizeof(cl_alloc_log_item));
+  }
+  cl_alloc_log_num = 0;
 
-static volatile int32_t cl_alloc_n = 0;
+  atexit(cl_alloc_report_unfreed);
+  inited = 1;
+}
 
-LOCAL void*
-cl_malloc(size_t sz)
+static void insert_alloc_log_item(void *ptr, size_t sz, char *file, int line)
 {
-  void * p = NULL;
-  atomic_inc(&cl_alloc_n);
-  p = malloc(sz);
+  cl_long slot;
+  int i;
+
+  if (cl_alloc_log_num > MAX_ALLOC_LOG_NUM) {
+    // To many alloc without free. We consider already leaks a lot.
+    cl_alloc_report_unfreed();
+    assert(0);
+  }
+
+  slot = (cl_long)ptr;
+  slot = (slot >> 5) & 0x07f;
+  assert(slot < ALLOC_LOG_BUCKET_SZ);
+
+  cl_alloc_log_item it = malloc(sizeof(_cl_alloc_log_item));
+  assert(it);
+  it->ptr = ptr;
+  it->size = sz;
+  it->file = file;
+  it->line = line;
+
+  pthread_mutex_lock(&cl_alloc_log_lock);
+  for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+    if (cl_alloc_log_map[slot][i] == NULL) {
+      break;
+    }
+  }
+
+  if (i == cl_alloc_log_map_size[slot]) {
+    cl_alloc_log_map[slot] =
+      realloc(cl_alloc_log_map[slot], 2 * cl_alloc_log_map_size[slot] * sizeof(cl_alloc_log_item));
+    memset(cl_alloc_log_map[slot] + cl_alloc_log_map_size[slot], 0,
+           cl_alloc_log_map_size[slot] * sizeof(cl_alloc_log_item));
+    cl_alloc_log_map_size[slot] = cl_alloc_log_map_size[slot] * 2;
+  }
+
+  cl_alloc_log_map[slot][i] = it;
+  cl_alloc_log_num++;
+  pthread_mutex_unlock(&cl_alloc_log_lock);
+}
+
+static void delete_alloc_log_item(void *ptr, char *file, int line)
+{
+  cl_long slot;
+  int i;
+
+  slot = (cl_long)ptr;
+  slot = (slot >> 5) & 0x07f;
+  assert(slot < ALLOC_LOG_BUCKET_SZ);
+
+  pthread_mutex_lock(&cl_alloc_log_lock);
+  for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+    if (cl_alloc_log_map[slot][i] && cl_alloc_log_map[slot][i]->ptr == ptr) {
+      break;
+    }
+  }
+
+  if (i == cl_alloc_log_map_size[slot]) {
+    printf("Free at file: %s, line: %d, We can not find the malloc log for this ptr:%p, fatal\n",
+           file, line, ptr);
+    assert(0);
+  }
+
+  free(cl_alloc_log_map[slot][i]);
+  cl_alloc_log_map[slot][i] = NULL;
+
+  cl_alloc_log_num--;
+  pthread_mutex_unlock(&cl_alloc_log_lock);
+}
+
+LOCAL void cl_register_alloc_ptr(void *ptr, size_t sz, char *file, int line)
+{
+  assert(ptr);
+  insert_alloc_log_item(ptr, sz, file, line);
+}
+
+LOCAL void *cl_malloc(size_t sz, char *file, int line)
+{
+  void *p = malloc(sz);
   assert(p);
+  insert_alloc_log_item(p, sz, file, line);
   return p;
 }
 
-LOCAL void*
-cl_aligned_malloc(size_t sz, size_t align)
+LOCAL void *cl_memalign(size_t align, size_t sz, char *file, int line)
 {
-  void * p = NULL;
-  atomic_inc(&cl_alloc_n);
+  void *p = NULL;
   p = memalign(align, sz);
   assert(p);
+  insert_alloc_log_item(p, ((sz + align - 1) / align) * align, file, line);
   return p;
 }
 
-LOCAL void*
-cl_calloc(size_t n, size_t elem_size)
+LOCAL void *cl_calloc(size_t n, size_t elem_size, char *file, int line)
 {
   void *p = NULL;
-  atomic_inc(&cl_alloc_n);
   p = calloc(n, elem_size);
   assert(p);
+  insert_alloc_log_item(p, n * elem_size, file, line);
   return p;
 }
 
-LOCAL void*
-cl_realloc(void *ptr, size_t sz)
+LOCAL void *cl_realloc(void *ptr, size_t sz, char *file, int line)
 {
-  if (ptr == NULL)
-    atomic_inc(&cl_alloc_n);
-  return realloc(ptr, sz);
+  void *p = NULL;
+
+  if (ptr != NULL) {
+    delete_alloc_log_item(ptr, file, line);
+  }
+
+  p = realloc(ptr, sz);
+  assert(p);
+  insert_alloc_log_item(p, sz, file, line);
+  return p;
 }
 
-LOCAL void
-cl_free(void *ptr)
+LOCAL void cl_free(void *ptr, char *file, int line)
 {
   if (ptr == NULL)
     return;
-  atomic_dec(&cl_alloc_n);
+
+  delete_alloc_log_item(ptr, file, line);
   free(ptr);
-  ptr = NULL;
 }
 
-LOCAL size_t
-cl_report_unfreed(void)
+void cl_alloc_report_unfreed(void)
 {
-  return cl_alloc_n;
-}
+  int i, slot, num;
+  pthread_mutex_lock(&cl_alloc_log_lock);
+  if (cl_alloc_log_num == 0) {
+    pthread_mutex_unlock(&cl_alloc_log_lock);
+    return;
+  }
 
-LOCAL void
-cl_report_set_all_freed(void)
-{
-  cl_alloc_n = 0;
+  printf("-------------------------------------------------------------------\n");
+  num = 0;
+  for (slot = 0; slot < ALLOC_LOG_BUCKET_SZ; slot++) {
+    for (i = 0; i < cl_alloc_log_map_size[slot]; i++) {
+      if (cl_alloc_log_map[slot][i]) {
+        printf("Leak point at file:%s, line: %d, ptr is %p, alloc size is %ld\n",
+               cl_alloc_log_map[slot][i]->file, cl_alloc_log_map[slot][i]->line,
+               cl_alloc_log_map[slot][i]->ptr, cl_alloc_log_map[slot][i]->size);
+        num++;
+      }
+    }
+  }
+  printf("-------------------------------------------------------------------\n");
+  assert(num == cl_alloc_log_num);
+  pthread_mutex_unlock(&cl_alloc_log_lock);
 }
 
+#endif
diff --git a/src/cl_alloc.h b/src/cl_alloc.h
index 433ffc6..61a904e 100644
--- a/src/cl_alloc.h
+++ b/src/cl_alloc.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * Copyright © 2012 Intel Corporation
  *
  * This library is free software; you can redistribute it and/or
@@ -20,28 +20,53 @@
 #ifndef __CL_ALLOC_H__
 #define __CL_ALLOC_H__
 
-#include "cl_internals.h"
 #include <stdlib.h>
+#include <malloc.h>
+
+//#define CL_ALLOC_DEBUG 1
+#ifdef CL_ALLOC_DEBUG
+
+/* Register some ptr allocated by other part */
+extern void cl_register_alloc_ptr(void *ptr, size_t sz, char *file, int line);
+#define CL_REGISTER_ALLOC_PTR(PTR, SZ) cl_register_alloc_ptr(PTR, SZ, __FILE__, __LINE__)
 
 /* Return a valid pointer for the requested memory block size */
-extern void *cl_malloc(size_t sz);
+extern void *cl_malloc(size_t sz, char *file, int line);
+#define CL_MALLOC(SZ) cl_malloc(SZ, __FILE__, __LINE__)
 
 /* Aligned malloc */
-extern void* cl_aligned_malloc(size_t sz, size_t align);
+extern void *cl_memalign(size_t align, size_t sz, char *file, int line);
+#define CL_MEMALIGN(ALIGN, SZ) cl_memalign(ALIGN, SZ, __FILE__, __LINE__)
 
 /* malloc + memzero */
-extern void *cl_calloc(size_t n, size_t elem_size);
+extern void *cl_calloc(size_t n, size_t elem_size, char *file, int line);
+#define CL_CALLOC(N, ELEM_SIZE) cl_calloc(N, ELEM_SIZE, __FILE__, __LINE__)
 
 /* Regular realloc */
-extern void *cl_realloc(void *ptr, size_t sz);
+extern void *cl_realloc(void *ptr, size_t sz, char *file, int line);
+#define CL_REALLOC(PTR, SZ) cl_realloc(PTR, SZ, __FILE__, __LINE__)
 
 /* Free a pointer allocated with cl_*alloc */
-extern void  cl_free(void *ptr);
+extern void cl_free(void *ptr, char *file, int line);
+#define CL_FREE(PTR) cl_free(PTR, __FILE__, __LINE__)
 
 /* We count the number of allocation. This function report the number of
  * allocation still unfreed
  */
-extern size_t cl_report_unfreed(void);
+extern void cl_alloc_report_unfreed(void);
+#define CL_ALLOC_REPORT_UNFREED() cl_alloc_report_unfreed()
 
-#endif /* __CL_ALLOC_H__ */
+extern void cl_alloc_debug_init(void);
+#define CL_ALLOC_DEBUG_INIT() cl_alloc_debug_init()
 
+#else
+#define CL_REGISTER_ALLOC_PTR(PTR, SZ)
+#define CL_MALLOC(SZ) malloc(SZ)
+#define CL_MEMALIGN(ALIGN, SZ) memalign(ALIGN, SZ)
+#define CL_CALLOC(N, ELEM_SIZE) calloc(N, ELEM_SIZE)
+#define CL_REALLOC(PTR, SZ) realloc(PTR, SZ)
+#define CL_FREE(PTR) free(PTR)
+#define CL_ALLOC_REPORT_UNFREED()
+#define CL_ALLOC_DEBUG_INIT()
+#endif /* end of CL_ALLOC_DEBUG */
+#endif /* __CL_ALLOC_H__ */
diff --git a/src/cl_api.c b/src/cl_api.c
index 24b8b3d..f72533f 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1280,7 +1280,8 @@ clGetExtensionFunctionAddressForPlatform(cl_platform_id platform,
 cl_int
 clReportUnfreedIntel(void)
 {
-  return cl_report_unfreed();
+  return CL_SUCCESS;
+  //return cl_report_unfreed();
 }
 
 void*
diff --git a/src/cl_api_context.c b/src/cl_api_context.c
index e8184b1..fa1be08 100644
--- a/src/cl_api_context.c
+++ b/src/cl_api_context.c
@@ -92,7 +92,7 @@ clCreateContextFromType(const cl_context_properties *properties,
       break;
 
     assert(num_devices > 0);
-    devices = cl_malloc(num_devices * sizeof(cl_device_id));
+    devices = CL_MALLOC(num_devices * sizeof(cl_device_id));
     err = cl_get_device_ids(NULL, device_type, num_devices, &devices[0], &num_devices);
     if (err != CL_SUCCESS)
       break;
@@ -101,7 +101,7 @@ clCreateContextFromType(const cl_context_properties *properties,
   } while (0);
 
   if (devices)
-    cl_free(devices);
+    CL_FREE(devices);
   if (errcode_ret)
     *errcode_ret = err;
   return context;
diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
index 13ea8c0..ce4d7b8 100644
--- a/src/cl_api_kernel.c
+++ b/src/cl_api_kernel.c
@@ -341,10 +341,10 @@ clEnqueueNativeKernel(cl_command_queue command_queue,
 
     //Per spec, need copy args
     if (cb_args) {
-      new_args = cl_malloc(cb_args);
+      new_args = CL_MALLOC(cb_args);
       if (num_mem_objects) {
-        new_args_mem_loc = cl_malloc(sizeof(void *) * num_mem_objects);
-        new_mem_list = cl_malloc(sizeof(cl_mem) * num_mem_objects);
+        new_args_mem_loc = CL_MALLOC(sizeof(void *) * num_mem_objects);
+        new_mem_list = CL_MALLOC(sizeof(cl_mem) * num_mem_objects);
         memcpy(new_mem_list, mem_list, sizeof(cl_mem) * num_mem_objects);
       }
 
@@ -405,11 +405,11 @@ clEnqueueNativeKernel(cl_command_queue command_queue,
 
   if (err != CL_SUCCESS) {
     if (new_args)
-      cl_free(new_args);
+      CL_FREE(new_args);
     if (new_mem_list)
-      cl_free(new_mem_list);
+      CL_FREE(new_mem_list);
     if (new_args_mem_loc)
-      cl_free(new_args_mem_loc);
+      CL_FREE(new_args_mem_loc);
   }
 
   if (err == CL_SUCCESS && event) {
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 55b1a23..1b21375 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -42,13 +42,13 @@ cl_command_queue_new(cl_context ctx)
   cl_command_queue queue = NULL;
 
   assert(ctx);
-  queue = cl_calloc(1, sizeof(_cl_command_queue));
+  queue = CL_CALLOC(1, sizeof(_cl_command_queue));
   if (queue == NULL)
     return NULL;
 
   CL_OBJECT_INIT_BASE(queue, CL_OBJECT_COMMAND_QUEUE_MAGIC);
   if (cl_command_queue_init_enqueue(queue) != CL_SUCCESS) {
-    cl_free(queue);
+    CL_FREE(queue);
     return NULL;
   }
 
@@ -91,10 +91,10 @@ cl_command_queue_delete(cl_command_queue queue)
 
   cl_mem_delete(queue->perf);
   if (queue->barrier_events) {
-    cl_free(queue->barrier_events);
+    CL_FREE(queue->barrier_events);
   }
   CL_OBJECT_DESTROY_BASE(queue);
-  cl_free(queue);
+  CL_FREE(queue);
 }
 
 LOCAL void
@@ -311,7 +311,7 @@ cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event)
 
   if (queue->barrier_events == NULL) {
     queue->barrier_events_size = 4;
-    queue->barrier_events = cl_calloc(queue->barrier_events_size, sizeof(cl_event));
+    queue->barrier_events = CL_CALLOC(queue->barrier_events_size, sizeof(cl_event));
     assert(queue->barrier_events);
   }
 
@@ -327,7 +327,7 @@ cl_command_queue_insert_barrier_event(cl_command_queue queue, cl_event event)
 
   /* Array is full, double expand. */
   queue->barrier_events_size *= 2;
-  queue->barrier_events = cl_realloc(queue->barrier_events,
+  queue->barrier_events = CL_REALLOC(queue->barrier_events,
                                      queue->barrier_events_size * sizeof(cl_event));
   assert(queue->barrier_events);
 
diff --git a/src/cl_command_queue_enqueue.c b/src/cl_command_queue_enqueue.c
index 44a0761..94c56bd 100644
--- a/src/cl_command_queue_enqueue.c
+++ b/src/cl_command_queue_enqueue.c
@@ -212,7 +212,7 @@ cl_command_queue_record_in_queue_events(cl_command_queue queue, cl_uint *list_nu
   }
   assert(event_num > 0);
 
-  enqueued_list = cl_calloc(event_num, sizeof(cl_event));
+  enqueued_list = CL_CALLOC(event_num, sizeof(cl_event));
   assert(enqueued_list);
 
   i = 0;
@@ -274,7 +274,7 @@ cl_command_queue_wait_flush(cl_command_queue queue)
     cl_event_delete(enqueued_list[i]);
   }
   if (enqueued_list)
-    cl_free(enqueued_list);
+    CL_FREE(enqueued_list);
 
   return CL_SUCCESS;
 }
@@ -324,7 +324,7 @@ cl_command_queue_wait_finish(cl_command_queue queue)
     cl_event_delete(enqueued_list[i]);
   }
   if (enqueued_list)
-    cl_free(enqueued_list);
+    CL_FREE(enqueued_list);
 
   return CL_SUCCESS;
 }
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index dd82a44..ea64abc 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -382,7 +382,7 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
   if (ker->exec_info_n > 0) {
     cst_sz += ker->exec_info_n * sizeof(void *);
     cst_sz = (cst_sz + 31) / 32 * 32;   //align to register size, hard code here.
-    ker->curbe = cl_realloc(ker->curbe, cst_sz);
+    ker->curbe = CL_REALLOC(ker->curbe, cst_sz);
   }
   ker->curbe_sz = cst_sz;
 
diff --git a/src/cl_context.c b/src/cl_context.c
index 1ba2302..c54760f 100644
--- a/src/cl_context.c
+++ b/src/cl_context.c
@@ -273,7 +273,7 @@ cl_create_context(const cl_context_properties *  properties,
 
   /* Filter out repeated device. */
   assert(num_devices > 0);
-  all_dev = cl_calloc(num_devices, sizeof(cl_device_id));
+  all_dev = CL_CALLOC(num_devices, sizeof(cl_device_id));
   if (all_dev == NULL) {
     *errcode_ret = CL_OUT_OF_HOST_MEMORY;
     return NULL;
@@ -296,13 +296,13 @@ cl_create_context(const cl_context_properties *  properties,
 
   /* We are good */
   if (UNLIKELY((ctx = cl_context_new(&props, dev_num, all_dev)) == NULL)) {
-    cl_free(all_dev);
+    CL_FREE(all_dev);
     err = CL_OUT_OF_HOST_MEMORY;
     goto error;
   }
 
   if(properties != NULL && prop_len > 0) {
-    TRY_ALLOC (ctx->prop_user, CALLOC_ARRAY(cl_context_properties, prop_len));
+    TRY_ALLOC (ctx->prop_user, CL_CALLOC(prop_len, sizeof(cl_context_properties)));
     memcpy(ctx->prop_user, properties, sizeof(cl_context_properties)*prop_len);
   }
   ctx->prop_len = prop_len;
@@ -329,7 +329,7 @@ cl_context_new(struct _cl_context_prop *props, cl_uint dev_num, cl_device_id* al
 {
   cl_context ctx = NULL;
 
-  TRY_ALLOC_NO_ERR (ctx, CALLOC(struct _cl_context));
+  TRY_ALLOC_NO_ERR (ctx, CL_CALLOC(1, sizeof(struct _cl_context)));
   CL_OBJECT_INIT_BASE(ctx, CL_OBJECT_CONTEXT_MAGIC);
   ctx->devices = all_dev;
   ctx->device_num = dev_num;
@@ -382,11 +382,11 @@ cl_context_delete(cl_context ctx)
   cl_program_delete(ctx->built_in_prgs);
   ctx->built_in_prgs = NULL;
 
-  cl_free(ctx->prop_user);
-  cl_free(ctx->devices);
+  CL_FREE(ctx->prop_user);
+  CL_FREE(ctx->devices);
   cl_driver_delete(ctx->drv);
   CL_OBJECT_DESTROY_BASE(ctx);
-  cl_free(ctx);
+  CL_FREE(ctx);
 }
 
 LOCAL void
diff --git a/src/cl_device_enqueue.c b/src/cl_device_enqueue.c
index b6932df..5d55c22 100644
--- a/src/cl_device_enqueue.c
+++ b/src/cl_device_enqueue.c
@@ -67,7 +67,7 @@ cl_device_enqueue_bind_buffer(cl_gpgpu gpgpu, cl_kernel ker, uint32_t *max_bti,
       if(ker->device_enqueue_ptr == NULL)
         ker->device_enqueue_ptr = cl_mem_svm_allocate(ker->program->ctx, 0, buf_size, 0);
       if(ker->device_enqueue_infos == NULL)
-        ker->device_enqueue_infos = cl_calloc(ker->arg_n, sizeof(void *));
+        ker->device_enqueue_infos = CL_CALLOC(ker->arg_n, sizeof(void *));
       ker->device_enqueue_info_n = 0;
       ker->useDeviceEnqueue = CL_TRUE;
       cl_device_enqueue_fix_offset(ker);
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 8350089..933b2b7 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -613,15 +613,15 @@ cl_enqueue_delete(enqueue_data *data)
 
   if (data->type == EnqueueNativeKernel) {
     if (data->mem_list) {
-      cl_free((void*)data->mem_list);
+      CL_FREE((void*)data->mem_list);
       data->mem_list = NULL;
     }
     if (data->ptr) {
-      cl_free((void*)data->ptr);
+      CL_FREE((void*)data->ptr);
       data->ptr = NULL;
     }
     if (data->const_ptr) {
-      cl_free((void*)data->const_ptr);
+      CL_FREE((void*)data->const_ptr);
       data->const_ptr = NULL;
     }
   }
diff --git a/src/cl_event.c b/src/cl_event.c
index a2b16be..6b018ee 100644
--- a/src/cl_event.c
+++ b/src/cl_event.c
@@ -149,7 +149,7 @@ cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type type,
              cl_uint num_events, cl_event *event_list)
 {
   int i;
-  cl_event e = cl_calloc(1, sizeof(_cl_event));
+  cl_event e = CL_CALLOC(1, sizeof(_cl_event));
   if (e == NULL)
     return NULL;
 
@@ -204,14 +204,14 @@ cl_event_delete(cl_event event)
     for (i = 0; i < event->depend_event_num; i++) {
       cl_event_delete(event->depend_events[i]);
     }
-    cl_free(event->depend_events);
+    CL_FREE(event->depend_events);
   }
 
   /* Free all the callbacks. Last ref, no need to lock. */
   while (!list_empty(&event->callbacks)) {
     cb = list_entry(event->callbacks.head_node.n, _cl_event_user_callback, node);
     list_node_del(&cb->node);
-    cl_free(cb);
+    CL_FREE(cb);
   }
 
   /* Remove it from the list */
@@ -219,7 +219,7 @@ cl_event_delete(cl_event event)
   cl_context_remove_event(event->ctx, event);
 
   CL_OBJECT_DESTROY_BASE(event);
-  cl_free(event);
+  CL_FREE(event);
 }
 
 LOCAL cl_event
@@ -253,7 +253,7 @@ cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events,
       total_events = queue->barrier_events_num + num_events;
 
       if (total_events) {
-        depend_events = cl_calloc(total_events, sizeof(cl_event));
+        depend_events = CL_CALLOC(total_events, sizeof(cl_event));
         if (depend_events == NULL) {
           CL_OBJECT_UNLOCK(queue);
           err = CL_OUT_OF_HOST_MEMORY;
@@ -295,7 +295,7 @@ cl_event_create(cl_context ctx, cl_command_queue queue, cl_uint num_events,
       for (i = 0; i < total_events; i++) {
         cl_event_delete(depend_events[i]);
       }
-      cl_free(depend_events);
+      CL_FREE(depend_events);
     }
 
     // if set depend_events, must succeed.
@@ -320,7 +320,7 @@ cl_event_set_callback(cl_event event, cl_int exec_type, cl_event_notify_cb pfn_n
   assert(pfn_notify);
 
   do {
-    cb = cl_calloc(1, sizeof(_cl_event_user_callback));
+    cb = CL_CALLOC(1, sizeof(_cl_event_user_callback));
     if (cb == NULL) {
       err = CL_OUT_OF_HOST_MEMORY;
       break;
@@ -349,7 +349,7 @@ cl_event_set_callback(cl_event event, cl_int exec_type, cl_event_notify_cb pfn_n
   } while (0);
 
   if (cb)
-    cl_free(cb);
+    CL_FREE(cb);
 
   return err;
 }
@@ -405,7 +405,7 @@ cl_event_set_status(cl_event event, cl_int status)
         list_node_del(&cb->node);
         cb->executed = CL_TRUE;
         cb->pfn_notify(event, status, cb->user_data);
-        cl_free(cb);
+        CL_FREE(cb);
       }
 
       CL_OBJECT_LOCK(event);
@@ -683,7 +683,7 @@ cl_event_create_marker_or_barrier(cl_command_queue queue, cl_uint num_events_in_
       cl_event_delete(depend_events[i]);
     }
     if (depend_events)
-      cl_free(depend_events);
+      CL_FREE(depend_events);
 
     if (err != CL_SUCCESS) {
       *error = err;
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index f687084..867231d 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -46,7 +46,7 @@ cl_kernel_delete(cl_kernel k)
   if (k->cmrt_kernel != NULL) {
     cmrt_destroy_kernel(k);
     CL_OBJECT_DESTROY_BASE(k);
-    cl_free(k);
+    CL_FREE(k);
     return;
   }
 #endif
@@ -60,35 +60,35 @@ cl_kernel_delete(cl_kernel k)
   /* This will be true for kernels created by clCreateKernel */
   if (k->ref_its_program) cl_program_delete(k->program);
   /* Release the curbe if allocated */
-  if (k->curbe) cl_free(k->curbe);
+  if (k->curbe) CL_FREE(k->curbe);
   /* Release the argument array if required */
   if (k->args) {
     for (i = 0; i < k->arg_n; ++i)
       if (k->args[i].mem != NULL)
         cl_mem_delete(k->args[i].mem);
-    cl_free(k->args);
+    CL_FREE(k->args);
   }
   if (k->image_sz)
-    cl_free(k->images);
+    CL_FREE(k->images);
 
   if (k->exec_info)
-    cl_free(k->exec_info);
+    CL_FREE(k->exec_info);
 
   if (k->device_enqueue_ptr)
     cl_mem_svm_delete(k->program->ctx, k->device_enqueue_ptr);
   if (k->device_enqueue_infos)
-    cl_free(k->device_enqueue_infos);
+    CL_FREE(k->device_enqueue_infos);
 
   CL_OBJECT_DESTROY_BASE(k);
 
-  cl_free(k);
+  CL_FREE(k);
 }
 
 LOCAL cl_kernel
 cl_kernel_new(cl_program p)
 {
   cl_kernel k = NULL;
-  TRY_ALLOC_NO_ERR (k, CALLOC(struct _cl_kernel));
+  TRY_ALLOC_NO_ERR (k, CL_CALLOC(1, sizeof(struct _cl_kernel)));
   CL_OBJECT_INIT_BASE(k, CL_OBJECT_KERNEL_MAGIC);
   k->program = p;
   k->cmrt_kernel = NULL;
@@ -317,7 +317,7 @@ cl_kernel_set_exec_info(cl_kernel k, size_t n, const void *value)
   assert(k != NULL);
 
   if (n == 0) return err;
-  TRY_ALLOC(k->exec_info, cl_calloc(n, 1));
+  TRY_ALLOC(k->exec_info, CL_CALLOC(n, 1));
   memcpy(k->exec_info, value, n);
   k->exec_info_n = n / sizeof(void *);
 
@@ -462,7 +462,7 @@ cl_kernel_setup(cl_kernel k, gbe_kernel opaque)
   assert(k->sampler_sz <= GEN_MAX_SURFACES);
   assert(k->image_sz <= ctx->devices[0]->max_read_image_args + ctx->devices[0]->max_write_image_args);
   if (k->image_sz > 0) {
-    TRY_ALLOC_NO_ERR(k->images, cl_calloc(k->image_sz, sizeof(k->images[0])));
+    TRY_ALLOC_NO_ERR(k->images, CL_CALLOC(k->image_sz, sizeof(k->images[0])));
     interp_kernel_get_image_data(k->opaque, k->images);
   } else
     k->images = NULL;
@@ -479,7 +479,7 @@ cl_kernel_dup(cl_kernel from)
 
   if (UNLIKELY(from == NULL))
     return NULL;
-  TRY_ALLOC_NO_ERR (to, CALLOC(struct _cl_kernel));
+  TRY_ALLOC_NO_ERR (to, CL_CALLOC(1, sizeof(struct _cl_kernel)));
   CL_OBJECT_INIT_BASE(to, CL_OBJECT_KERNEL_MAGIC);
   to->bo = from->bo;
   to->opaque = from->opaque;
@@ -495,16 +495,16 @@ cl_kernel_dup(cl_kernel from)
   if (to->sampler_sz)
     memcpy(to->samplers, from->samplers, to->sampler_sz * sizeof(uint32_t));
   if (to->image_sz) {
-    TRY_ALLOC_NO_ERR(to->images, cl_calloc(to->image_sz, sizeof(to->images[0])));
+    TRY_ALLOC_NO_ERR(to->images, CL_CALLOC(to->image_sz, sizeof(to->images[0])));
     memcpy(to->images, from->images, to->image_sz * sizeof(to->images[0]));
   } else
     to->images = NULL;
   if (to->exec_info_n) { /* Must always 0 here */
-    TRY_ALLOC_NO_ERR(to->exec_info, cl_calloc(to->exec_info_n, sizeof(void *)));
+    TRY_ALLOC_NO_ERR(to->exec_info, CL_CALLOC(to->exec_info_n, sizeof(void *)));
     memcpy(to->exec_info, from->exec_info, to->exec_info_n * sizeof(void *));
   }
-  TRY_ALLOC_NO_ERR(to->args, cl_calloc(to->arg_n, sizeof(cl_argument)));
-  if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, cl_calloc(1, to->curbe_sz));
+  TRY_ALLOC_NO_ERR(to->args, CL_CALLOC(to->arg_n, sizeof(cl_argument)));
+  if (to->curbe_sz) TRY_ALLOC_NO_ERR(to->curbe, CL_CALLOC(1, to->curbe_sz));
 
   /* Retain the bos */
   if (from->bo)       cl_buffer_reference(from->bo);
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 0278b7f..f0cccb8 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -122,23 +122,23 @@ cl_mem_allocate(enum cl_mem_type type,
   /* Allocate and inialize the structure itself */
   if (type == CL_MEM_IMAGE_TYPE) {
     struct _cl_mem_image *image = NULL;
-    TRY_ALLOC (image, CALLOC(struct _cl_mem_image));
+    TRY_ALLOC (image, CL_CALLOC(1, sizeof(struct _cl_mem_image)));
     mem = &image->base;
   } else if (type == CL_MEM_GL_IMAGE_TYPE ) {
     struct _cl_mem_gl_image *gl_image = NULL;
-    TRY_ALLOC (gl_image, CALLOC(struct _cl_mem_gl_image));
+    TRY_ALLOC (gl_image, CL_CALLOC(1, sizeof(struct _cl_mem_gl_image)));
     mem = &gl_image->base.base;
   } else if (type == CL_MEM_BUFFER1D_IMAGE_TYPE) {
     struct _cl_mem_buffer1d_image *buffer1d_image = NULL;
-    TRY_ALLOC(buffer1d_image, CALLOC(struct _cl_mem_buffer1d_image));
+    TRY_ALLOC(buffer1d_image, CL_CALLOC(1, sizeof(struct _cl_mem_buffer1d_image)));
     mem = &buffer1d_image->base.base;
   } else if (type == CL_MEM_PIPE_TYPE) {
     _cl_mem_pipe *pipe = NULL;
-    TRY_ALLOC(pipe, CALLOC(struct _cl_mem_pipe));
+    TRY_ALLOC(pipe, CL_CALLOC(1, sizeof(struct _cl_mem_pipe)));
     mem = &pipe->base;
   } else {
     struct _cl_mem_buffer *buffer = NULL;
-    TRY_ALLOC (buffer, CALLOC(struct _cl_mem_buffer));
+    TRY_ALLOC (buffer, CL_CALLOC(1, sizeof(struct _cl_mem_buffer)));
     mem = &buffer->base;
   }
 
@@ -197,7 +197,7 @@ cl_mem_allocate(enum cl_mem_type type,
         }
         else if (flags & CL_MEM_ALLOC_HOST_PTR) {
           const size_t alignedSZ = ALIGN(sz, page_size);
-          void* internal_host_ptr = cl_aligned_malloc(alignedSZ, page_size);
+          void* internal_host_ptr = CL_MEMALIGN(page_size, alignedSZ);
           mem->host_ptr = internal_host_ptr;
           mem->is_userptr = 1;
           mem->bo = cl_buffer_alloc_userptr(bufmgr, "CL userptr memory object", internal_host_ptr, alignedSZ, 0);
@@ -447,7 +447,7 @@ cl_mem_new_sub_buffer(cl_mem buffer,
   }
 
   /* Now create the sub buffer and link it to the buffer. */
-  TRY_ALLOC (sub_buf, CALLOC(struct _cl_mem_buffer));
+  TRY_ALLOC (sub_buf, CL_CALLOC(1, sizeof(struct _cl_mem_buffer)));
   mem = &sub_buf->base;
 
   CL_OBJECT_INIT_BASE(mem, CL_OBJECT_MEM_MAGIC);
@@ -498,7 +498,7 @@ cl_mem cl_mem_new_pipe(cl_context ctx,
   cl_mem mem = NULL;
   cl_int err;
   cl_uint sz;
-  if(UNLIKELY((pipe = CALLOC(_cl_mem_pipe)) == NULL)) {
+  if(UNLIKELY((pipe = CL_CALLOC(1, sizeof(_cl_mem_pipe))) == NULL)) {
     err = CL_OUT_OF_RESOURCES;
     goto error;
   }
@@ -602,7 +602,7 @@ void* cl_mem_svm_allocate(cl_context ctx, cl_svm_mem_flags flags,
   cl_buffer_mgr bufmgr = NULL;
   cl_mem mem;
   _cl_mem_svm* svm;
-  if(UNLIKELY((svm = CALLOC(_cl_mem_svm)) == NULL))
+  if(UNLIKELY((svm = CL_CALLOC(1, sizeof(_cl_mem_svm))) == NULL))
     return NULL;
   mem = &svm->base;
 
@@ -623,7 +623,7 @@ void* cl_mem_svm_allocate(cl_context ctx, cl_svm_mem_flags flags,
     alignment = page_size;
   else
     alignment = ALIGN(alignment, page_size);
-  ptr = cl_aligned_malloc(alignedSZ, alignment);
+  ptr = CL_MEMALIGN(alignment, alignedSZ);
   if(ptr == NULL) return NULL;
 
   mem->host_ptr = ptr;
@@ -1232,7 +1232,7 @@ cl_mem_delete(cl_mem mem)
     cb = list_entry(mem->dstr_cb_head.head_node.n, _cl_mem_dstr_cb, node);
     list_node_del(&cb->node);
     cb->pfn_notify(mem, cb->user_data);
-    cl_free(cb);
+    CL_FREE(cb);
   }
 
   /* iff we are a image, delete the 1d buffer if has. */
@@ -1293,10 +1293,10 @@ cl_mem_delete(cl_mem mem)
       (mem->flags & CL_MEM_ALLOC_HOST_PTR) &&
       (mem->type != CL_MEM_SUBBUFFER_TYPE)) ||
       (mem->is_svm && mem->type == CL_MEM_SVM_TYPE))
-    cl_free(mem->host_ptr);
+    CL_FREE(mem->host_ptr);
 
   CL_OBJECT_DESTROY_BASE(mem);
-  cl_free(mem);
+  CL_FREE(mem);
 }
 
 LOCAL void
@@ -2473,7 +2473,7 @@ LOCAL cl_int
 cl_mem_set_destructor_callback(cl_mem memobj,
                                void(CL_CALLBACK *pfn_notify)(cl_mem, void *), void *user_data)
 {
-  cl_mem_dstr_cb cb = cl_calloc(1, sizeof(_cl_mem_dstr_cb));
+  cl_mem_dstr_cb cb = CL_CALLOC(1, sizeof(_cl_mem_dstr_cb));
   if (cb == NULL) {
     return CL_OUT_OF_HOST_MEMORY;
   }
diff --git a/src/cl_program.c b/src/cl_program.c
index 46f9d1f..c090bba 100644
--- a/src/cl_program.c
+++ b/src/cl_program.c
@@ -43,7 +43,7 @@ static void
 cl_program_release_sources(cl_program p)
 {
   if (p->source) {
-    cl_free(p->source);
+    CL_FREE(p->source);
     p->source = NULL;
   }
 }
@@ -52,7 +52,7 @@ static void
 cl_program_release_binary(cl_program p)
 {
   if (p->binary) {
-    cl_free(p->binary);
+    CL_FREE(p->binary);
     p->binary = NULL;
   }
 }
@@ -74,7 +74,7 @@ cl_program_delete(cl_program p)
 
   /* Release the build options. */
   if (p->build_opts) {
-    cl_free(p->build_opts);
+    CL_FREE(p->build_opts);
     p->build_opts = NULL;
   }
 
@@ -89,15 +89,15 @@ cl_program_delete(cl_program p)
   else
 #endif
   {
-    cl_free(p->bin);               /* Free the blob */
+    CL_FREE(p->bin);               /* Free the blob */
     for (i = 0; i < p->ker_n; ++i) /* Free the kernels */
       cl_kernel_delete(p->ker[i]);
-    cl_free(p->ker);
+    CL_FREE(p->ker);
   }
 
   if (p->global_data_ptr)
     cl_buffer_unreference(p->global_data);
-  cl_free(p->global_data_ptr);
+  CL_FREE(p->global_data_ptr);
 
   /* Remove it from the list */
   cl_context_remove_program(p->ctx, p);
@@ -114,7 +114,7 @@ cl_program_delete(cl_program p)
   }
 
   CL_OBJECT_DESTROY_BASE(p);
-  cl_free(p);
+  CL_FREE(p);
 }
 
 LOCAL cl_program
@@ -123,7 +123,7 @@ cl_program_new(cl_context ctx)
   cl_program p = NULL;
 
   /* Allocate the structure */
-  TRY_ALLOC_NO_ERR (p, CALLOC(struct _cl_program));
+  TRY_ALLOC_NO_ERR (p, CL_CALLOC(1, sizeof(struct _cl_program)));
   CL_OBJECT_INIT_BASE(p, CL_OBJECT_PROGRAM_MAGIC);
   p->build_status = CL_BUILD_NONE;
   p->cmrt_program = NULL;
@@ -158,7 +158,7 @@ cl_program_load_gen_program(cl_program p)
   p->ker_n = interp_program_get_kernel_num(p->opaque);
 
   /* Allocate the kernel array */
-  TRY_ALLOC (p->ker, CALLOC_ARRAY(cl_kernel, p->ker_n));
+  TRY_ALLOC (p->ker, CL_CALLOC(p->ker_n, sizeof(cl_kernel)));
 
   for (i = 0; i < p->ker_n; ++i) {
     const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i);
@@ -218,7 +218,7 @@ static cl_int get_program_global_data(cl_program prog) {
 
   int page_size = getpagesize();
   size_t alignedSz = ALIGN(const_size, page_size);
-  char * p = (char*)cl_aligned_malloc(alignedSz, page_size);
+  char * p = (char*)CL_MEMALIGN(page_size, alignedSz);
   prog->global_data_ptr = p;
   interp_program_get_global_constant_data(prog->opaque, (char*)p);
 
@@ -292,7 +292,7 @@ cl_program_create_from_binary(cl_context             ctx,
       goto error;
   }
 
-  TRY_ALLOC(program->binary, cl_calloc(lengths[0], sizeof(char)));
+  TRY_ALLOC(program->binary, CL_CALLOC(lengths[0], sizeof(char)));
   memcpy(program->binary, binaries[0], lengths[0]);
   program->binary_sz = lengths[0];
   program->source_type = FROM_BINARY;
@@ -301,11 +301,11 @@ cl_program_create_from_binary(cl_context             ctx,
     program->source_type = FROM_CMRT;
   }else if(isSPIR((unsigned char*)program->binary)) {
     char* typed_binary;
-    TRY_ALLOC(typed_binary, cl_calloc(lengths[0]+1, sizeof(char)));
+    TRY_ALLOC(typed_binary, CL_CALLOC(lengths[0]+1, sizeof(char)));
     memcpy(typed_binary+1, binaries[0], lengths[0]);
     *typed_binary = 1;
     program->opaque = compiler_program_new_from_llvm_binary(program->ctx->devices[0]->device_id, typed_binary, program->binary_sz+1);
-    cl_free(typed_binary);
+    CL_FREE(typed_binary);
     if (UNLIKELY(program->opaque == NULL)) {
       err = CL_INVALID_PROGRAM;
       goto error;
@@ -403,7 +403,7 @@ cl_program_create_with_built_in_kernles(cl_context     ctx,
   int i = 0;
 
   //copy the content to local_kernel_names to protect the kernel_names.
-  TRY_ALLOC(local_kernel_names, cl_calloc(strlen(kernel_names)+1, sizeof(char) ) );
+  TRY_ALLOC(local_kernel_names, CL_CALLOC(strlen(kernel_names)+1, sizeof(char) ) );
   memcpy(local_kernel_names, kernel_names, strlen(kernel_names)+1);
 
   kernel = strtok_r( local_kernel_names, delims , &saveptr);
@@ -423,7 +423,7 @@ cl_program_create_with_built_in_kernles(cl_context     ctx,
     kernel = strtok_r((char*)saveptr , delims, &saveptr );
   }
 
-  cl_free(local_kernel_names);
+  CL_FREE(local_kernel_names);
 
 exit:
   if (errcode_ret)
@@ -500,7 +500,7 @@ cl_program_create_from_source(cl_context ctx,
       goto error;
   }
 
-  TRY_ALLOC (lens, cl_calloc(count, sizeof(int32_t)));
+  TRY_ALLOC (lens, CL_CALLOC(count, sizeof(int32_t)));
   for (i = 0; i < (int) count; ++i) {
     size_t len;
     if (lengths == NULL || lengths[i] == 0)
@@ -510,7 +510,7 @@ cl_program_create_from_source(cl_context ctx,
     lens[i] = len;
     len_total += len;
   }
-  TRY_ALLOC(program->source, cl_calloc(len_total+1, sizeof(char)));
+  TRY_ALLOC(program->source, CL_CALLOC(len_total+1, sizeof(char)));
   p = program->source;
   for (i = 0; i < (int) count; ++i) {
     memcpy(p, strings[i], lens[i]);
@@ -522,7 +522,7 @@ cl_program_create_from_source(cl_context ctx,
   program->binary_type = CL_PROGRAM_BINARY_TYPE_NONE;
 
 exit:
-  cl_free(lens);
+  CL_FREE(lens);
   lens = NULL;
   if (errcode_ret)
     *errcode_ret = err;
@@ -603,16 +603,16 @@ cl_program_build(cl_program p, const char *options)
   if (options) {
     if(p->build_opts == NULL || strcmp(options, p->build_opts) != 0) {
       if(p->build_opts) {
-        cl_free(p->build_opts);
+        CL_FREE(p->build_opts);
         p->build_opts = NULL;
       }
-      TRY_ALLOC (p->build_opts, cl_calloc(strlen(options) + 1, sizeof(char)));
+      TRY_ALLOC (p->build_opts, CL_CALLOC(strlen(options) + 1, sizeof(char)));
       memcpy(p->build_opts, options, strlen(options));
     }
   }
 
   if (options == NULL && p->build_opts) {
-    cl_free(p->build_opts);
+    CL_FREE(p->build_opts);
     p->build_opts = NULL;
   }
 
@@ -666,7 +666,7 @@ cl_program_build(cl_program p, const char *options)
     p->bin_sz += interp_kernel_get_code_size(opaque);
   }
 
-  TRY_ALLOC (p->bin, cl_calloc(p->bin_sz, sizeof(char)));
+  TRY_ALLOC (p->bin, CL_CALLOC(p->bin_sz, sizeof(char)));
   for (i = 0; i < p->ker_n; i ++) {
     const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i);
     size_t sz = interp_kernel_get_code_size(opaque);
@@ -736,7 +736,7 @@ cl_program_link(cl_context            context,
   }
 
   if(option_exist) {
-      TRY_ALLOC (p->build_opts, cl_calloc(strlen(kernel_arg_option) + 1, sizeof(char)));
+      TRY_ALLOC (p->build_opts, CL_CALLOC(strlen(kernel_arg_option) + 1, sizeof(char)));
       memcpy(p->build_opts, kernel_arg_option, strlen(kernel_arg_option));
   }
 
@@ -774,7 +774,7 @@ cl_program_link(cl_context            context,
     p->bin_sz += interp_kernel_get_code_size(opaque);
   }
 
-  TRY_ALLOC (p->bin, cl_calloc(p->bin_sz, sizeof(char)));
+  TRY_ALLOC (p->bin, CL_CALLOC(p->bin_sz, sizeof(char)));
   for (i = 0; i < p->ker_n; i ++) {
     const gbe_kernel opaque = interp_program_get_kernel(p->opaque, i);
     size_t sz = interp_kernel_get_code_size(opaque);
@@ -824,16 +824,16 @@ cl_program_compile(cl_program            p,
   if (options) {
     if(p->build_opts == NULL || strcmp(options, p->build_opts) != 0) {
       if(p->build_opts) {
-        cl_free(p->build_opts);
+        CL_FREE(p->build_opts);
         p->build_opts = NULL;
       }
-      TRY_ALLOC (p->build_opts, cl_calloc(strlen(options) + 1, sizeof(char)));
+      TRY_ALLOC (p->build_opts, CL_CALLOC(strlen(options) + 1, sizeof(char)));
       memcpy(p->build_opts, options, strlen(options));
     }
   }
 
   if (options == NULL && p->build_opts) {
-    cl_free(p->build_opts);
+    CL_FREE(p->build_opts);
     p->build_opts = NULL;
   }
 
diff --git a/src/cl_sampler.c b/src/cl_sampler.c
index d1e6dfe..69d90e6 100644
--- a/src/cl_sampler.c
+++ b/src/cl_sampler.c
@@ -77,7 +77,7 @@ cl_create_sampler(cl_context ctx, cl_bool normalized_coords, cl_addressing_mode
   cl_sampler sampler = NULL;
 
   /* Allocate and inialize the structure itself */
-  sampler = cl_calloc(1, sizeof(_cl_sampler));
+  sampler = CL_CALLOC(1, sizeof(_cl_sampler));
   if (sampler == NULL) {
     *errcode_ret = CL_OUT_OF_HOST_MEMORY;
     return NULL;
@@ -108,7 +108,7 @@ cl_sampler_delete(cl_sampler sampler)
 
   cl_context_remove_sampler(sampler->ctx, sampler);
   CL_OBJECT_DESTROY_BASE(sampler);
-  cl_free(sampler);
+  CL_FREE(sampler);
 }
 
 LOCAL void
diff --git a/src/cl_utils.h b/src/cl_utils.h
index 2d24207..1f5dbce 100644
--- a/src/cl_utils.h
+++ b/src/cl_utils.h
@@ -271,9 +271,6 @@ do {                                                        \
 } while (0)
 
 #define ELEMENTS(x) (sizeof(x)/sizeof(*(x)))
-#define CALLOC_STRUCT(T) (struct T*) cl_calloc(1, sizeof(struct T))
-#define CALLOC(T) (T*) cl_calloc(1, sizeof(T))
-#define CALLOC_ARRAY(T, N) (T*) cl_calloc(N, sizeof(T))
 #define MEMZERO(x) do { memset((x),0,sizeof(*(x))); } while (0)
 
 /* Run some code and catch errors */
diff --git a/src/gen/cl_command_queue_gen.c b/src/gen/cl_command_queue_gen.c
index d12ced8..c2f3888 100644
--- a/src/gen/cl_command_queue_gen.c
+++ b/src/gen/cl_command_queue_gen.c
@@ -161,7 +161,7 @@ gen_gpgpu_setup_curbe(cl_kernel kernel, cl_kernel_gen kernel_gen, gen_gpgpu *gpu
     return CL_SUCCESS;
   }
 
-  curbe = cl_malloc(curbe_size);
+  curbe = CL_MALLOC(curbe_size);
   if (curbe == NULL) {
     return CL_OUT_OF_HOST_MEMORY;
   }
@@ -469,7 +469,7 @@ gen_gpgpu_upload_final_curbe(cl_kernel kernel, cl_kernel_gen kernel_gen,
   }
 
   assert(gpu->thread.thread_num > 0);
-  final_curbe = cl_malloc(gpu->thread.thread_num * gpu->thread.curbe_size);
+  final_curbe = CL_MALLOC(gpu->thread.thread_num * gpu->thread.curbe_size);
   if (final_curbe == NULL)
     return CL_OUT_OF_HOST_MEMORY;
 
@@ -583,7 +583,7 @@ gen_gpgpu_upload_final_curbe(cl_kernel kernel, cl_kernel_gen kernel_gen,
 
   /* All settings are OK, upload it to GPU */
   gen_gpgpu_upload_curbes(gpu, final_curbe, gpu->thread.thread_num * gpu->thread.curbe_size);
-  cl_free(final_curbe);
+  CL_FREE(final_curbe);
   return CL_SUCCESS;
 }
 
@@ -668,7 +668,7 @@ cl_command_queue_ND_range_gen(cl_command_queue queue, cl_kernel kernel, cl_event
   if (kernel_gen->scratch_size > queue->device->scratch_mem_size)
     return CL_OUT_OF_RESOURCES;
 
-  gpu = cl_calloc(1, sizeof(gen_gpgpu));
+  gpu = CL_CALLOC(1, sizeof(gen_gpgpu));
   if (gpu == NULL)
     return CL_OUT_OF_HOST_MEMORY;
 
@@ -836,7 +836,7 @@ cl_command_queue_delete_gpgpu(void *gpgpu)
   }
 
   if (gpu->thread.curbe) {
-    cl_free(gpu->thread.curbe);
+    CL_FREE(gpu->thread.curbe);
     gpu->thread.curbe = NULL;
   }
 
@@ -871,6 +871,6 @@ cl_command_queue_delete_gpgpu(void *gpgpu)
     gpu->batch = NULL;
   }
 
-  cl_free(gpu);
+  CL_FREE(gpu);
   return;
 }
diff --git a/src/gen/cl_kernel_gen.c b/src/gen/cl_kernel_gen.c
index f555212..78ce6b8 100644
--- a/src/gen/cl_kernel_gen.c
+++ b/src/gen/cl_kernel_gen.c
@@ -87,7 +87,7 @@ cl_kernel_get_max_workgroup_size_gen(cl_kernel kernel, cl_device_id device)
 LOCAL void *
 cl_kernel_new_gen(cl_device_id device, cl_kernel kernel)
 {
-  cl_kernel_gen gen_kernel = cl_calloc(1, sizeof(_cl_kernel_gen));
+  cl_kernel_gen gen_kernel = CL_CALLOC(1, sizeof(_cl_kernel_gen));
   if (gen_kernel == NULL)
     return NULL;
 
@@ -102,23 +102,23 @@ cl_kernel_delete_gen(cl_device_id device, cl_kernel kernel)
   DEV_PRIVATE_DATA(kernel, device, kernel_gen);
 
   if (kernel_gen->samper_info) {
-    cl_free(kernel_gen->samper_info);
+    CL_FREE(kernel_gen->samper_info);
     kernel_gen->samper_info = NULL;
   }
   if (kernel_gen->arg_extra_info) {
-    cl_free(kernel_gen->arg_extra_info);
+    CL_FREE(kernel_gen->arg_extra_info);
     kernel_gen->arg_extra_info = NULL;
   }
   if (kernel_gen->virt_reg_phy_offset) {
-    cl_free(kernel_gen->virt_reg_phy_offset);
+    CL_FREE(kernel_gen->virt_reg_phy_offset);
     kernel_gen->virt_reg_phy_offset = NULL;
   }
   if (kernel_gen->image_info) {
-    cl_free(kernel_gen->image_info);
+    CL_FREE(kernel_gen->image_info);
     kernel_gen->image_info = NULL;
   }
 
-  cl_free(kernel_gen);
+  CL_FREE(kernel_gen);
 }
 
 LOCAL cl_int
@@ -228,7 +228,7 @@ cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel)
       return CL_INVALID_KERNEL_DEFINITION;
 
     if (kernel->kernel_attr == NULL) {
-      kernel->kernel_attr = cl_malloc(strlen(ptr) + 1);
+      kernel->kernel_attr = CL_MALLOC(strlen(ptr) + 1);
       if (kernel->kernel_attr == NULL)
         return CL_OUT_OF_HOST_MEMORY;
       memcpy(kernel->kernel_attr, ptr, strlen(ptr) + 1);
@@ -258,7 +258,7 @@ cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel)
         if (strcmp(kernel->args[i].arg_type_name, ptr) != 0)
           return CL_INVALID_KERNEL_DEFINITION;
       } else {
-        kernel->args[i].arg_type_name = cl_malloc(strlen(ptr) + 1);
+        kernel->args[i].arg_type_name = CL_MALLOC(strlen(ptr) + 1);
         if (kernel->args[i].arg_type_name == NULL)
           return CL_OUT_OF_HOST_MEMORY;
         memcpy(kernel->args[i].arg_type_name, ptr, strlen(ptr) + 1);
@@ -275,7 +275,7 @@ cl_program_gen_get_kernel_func_cl_info(cl_device_id device, cl_kernel kernel)
         if (strcmp(kernel->args[i].arg_name, ptr) != 0)
           return CL_INVALID_KERNEL_DEFINITION;
       } else {
-        kernel->args[i].arg_name = cl_malloc(strlen(ptr) + 1);
+        kernel->args[i].arg_name = CL_MALLOC(strlen(ptr) + 1);
         if (kernel->args[i].arg_name == NULL)
           return CL_OUT_OF_HOST_MEMORY;
         memcpy(kernel->args[i].arg_name, ptr, strlen(ptr) + 1);
@@ -381,12 +381,12 @@ cl_program_gen_get_one_kernel_func(cl_device_id device, cl_kernel kernel, GElf_S
       cmp_arg = 1;
     } else {
       kernel->arg_n = arg_num;
-      kernel->args = cl_calloc(arg_num, sizeof(cl_argument));
+      kernel->args = CL_CALLOC(arg_num, sizeof(cl_argument));
       if (kernel->args == NULL)
         return CL_OUT_OF_HOST_MEMORY;
     }
 
-    kernel_gen->arg_extra_info = cl_calloc(arg_num, sizeof(_cl_gen_arg_extra_info));
+    kernel_gen->arg_extra_info = CL_CALLOC(arg_num, sizeof(_cl_gen_arg_extra_info));
     if (kernel_gen->arg_extra_info == NULL)
       return CL_OUT_OF_HOST_MEMORY;
 
@@ -453,7 +453,7 @@ cl_program_gen_get_one_kernel_func(cl_device_id device, cl_kernel kernel, GElf_S
     kernel_gen->samper_info_num = *((cl_uint *)ptr);
     ptr += sizeof(cl_uint);
     if (kernel_gen->samper_info_num) {
-      kernel_gen->samper_info = cl_calloc(kernel_gen->samper_info_num, sizeof(cl_uint));
+      kernel_gen->samper_info = CL_CALLOC(kernel_gen->samper_info_num, sizeof(cl_uint));
       if (kernel_gen->samper_info == NULL)
         return CL_OUT_OF_HOST_MEMORY;
 
@@ -468,7 +468,7 @@ cl_program_gen_get_one_kernel_func(cl_device_id device, cl_kernel kernel, GElf_S
     ptr += sizeof(cl_uint);
     if (kernel_gen->image_info_num) {
       kernel_gen->image_info =
-        cl_calloc(kernel_gen->image_info_num, sizeof(_cl_gen_image_info_offset));
+        CL_CALLOC(kernel_gen->image_info_num, sizeof(_cl_gen_image_info_offset));
       if (kernel_gen->image_info == NULL)
         return CL_OUT_OF_HOST_MEMORY;
 
@@ -493,7 +493,7 @@ cl_program_gen_get_one_kernel_func(cl_device_id device, cl_kernel kernel, GElf_S
     ptr += sizeof(cl_uint);
     if (kernel_gen->virt_reg_phy_offset_num) {
       kernel_gen->virt_reg_phy_offset =
-        cl_calloc(kernel_gen->virt_reg_phy_offset_num, sizeof(_cl_gen_virt_phy_offset));
+        CL_CALLOC(kernel_gen->virt_reg_phy_offset_num, sizeof(_cl_gen_virt_phy_offset));
       if (kernel_gen->virt_reg_phy_offset == NULL)
         return CL_OUT_OF_HOST_MEMORY;
 
diff --git a/src/gen/cl_program_gen.c b/src/gen/cl_program_gen.c
index 58be603..561c7e0 100644
--- a/src/gen/cl_program_gen.c
+++ b/src/gen/cl_program_gen.c
@@ -64,7 +64,7 @@ cl_program_parse_gen_elf_stream(cl_char *bit_stream, size_t size)
 LOCAL void *
 cl_program_new_gen(cl_device_id device, cl_program p)
 {
-  cl_program_gen gen_elf = cl_calloc(1, sizeof(_cl_program_gen));
+  cl_program_gen gen_elf = CL_CALLOC(1, sizeof(_cl_program_gen));
   if (gen_elf == NULL)
     return NULL;
 
@@ -87,9 +87,9 @@ cl_program_delete_gen(cl_device_id device, cl_program p)
     assert(pd->kernel_num > 0);
     for (i = 0; i < pd->kernel_num; i++) {
       if (pd->kernel_names[i])
-        cl_free(pd->kernel_names[i]);
+        CL_FREE(pd->kernel_names[i]);
     }
-    cl_free(pd->kernel_names);
+    CL_FREE(pd->kernel_names);
   }
   pd->kernel_names = NULL;
 
@@ -97,7 +97,7 @@ cl_program_delete_gen(cl_device_id device, cl_program p)
     elf_end(gen_elf->elf);
   gen_elf->elf = NULL;
 
-  cl_free(gen_elf);
+  CL_FREE(gen_elf);
 }
 
 static cl_int
@@ -243,7 +243,7 @@ cl_program_load_binary_gen_elf(cl_device_id device, cl_program prog)
     return CL_INVALID_PROGRAM;
   }
 
-  pd->kernel_names = cl_calloc(pd->kernel_num, sizeof(char *));
+  pd->kernel_names = CL_CALLOC(pd->kernel_num, sizeof(char *));
   if (pd->kernel_names == NULL) {
     elf_end(elf_p);
     elf->elf = NULL;
@@ -259,7 +259,7 @@ cl_program_load_binary_gen_elf(cl_device_id device, cl_program prog)
       continue;
 
     pd->kernel_names[j] =
-      cl_calloc(1, strlen(p_sym_entry->st_name + elf->strtab_data->d_buf) + 1);
+      CL_CALLOC(1, strlen(p_sym_entry->st_name + elf->strtab_data->d_buf) + 1);
     if (pd->kernel_names[j] == NULL) {
       elf_end(elf_p);
       elf->elf = NULL;
diff --git a/src/intel/intel_batchbuffer.c b/src/intel/intel_batchbuffer.c
index be104bb..96453d4 100644
--- a/src/intel/intel_batchbuffer.c
+++ b/src/intel/intel_batchbuffer.c
@@ -166,7 +166,7 @@ intel_batchbuffer_new(intel_driver_t *intel)
 {
   intel_batchbuffer_t *batch = NULL;
   assert(intel);
-  TRY_ALLOC_NO_ERR (batch, CALLOC(intel_batchbuffer_t));
+  TRY_ALLOC_NO_ERR (batch, CL_CALLOC(1, sizeof(intel_batchbuffer_t)));
   intel_batchbuffer_init(batch, intel);
 
 exit:
@@ -185,5 +185,5 @@ intel_batchbuffer_delete(intel_batchbuffer_t *batch)
   if(batch->buffer)
     intel_batchbuffer_terminate(batch);
 
-  cl_free(batch);
+  CL_FREE(batch);
 }
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index b8a1b52..d7b9919 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -86,7 +86,7 @@ intel_driver_delete(intel_driver_t *driver)
   if (driver == NULL)
     return;
 
-  cl_free(driver);
+  CL_FREE(driver);
 }
 
 static intel_driver_t*
@@ -94,7 +94,7 @@ intel_driver_new(void)
 {
   intel_driver_t *driver = NULL;
 
-  TRY_ALLOC_NO_ERR (driver, CALLOC(intel_driver_t));
+  TRY_ALLOC_NO_ERR (driver, CL_CALLOC(1, sizeof(intel_driver_t)));
   driver->fd = -1;
 
 exit:
@@ -919,7 +919,7 @@ if (intel_driver_open(driver, NULL) != CL_SUCCESS) {
 const size_t sz = 4096;
 void *host_ptr;
 
-host_ptr = cl_aligned_malloc(sz, 4096);
+host_ptr = CL_MEMALIGN(sz, 4096);
 if (host_ptr != NULL) {
   cl_buffer bo = intel_buffer_alloc_userptr((cl_buffer_mgr)driver->bufmgr,
     "CL memory object", host_ptr, sz, 0);
@@ -927,7 +927,7 @@ if (host_ptr != NULL) {
     device->host_unified_memory = CL_FALSE;
   else
     drm_intel_bo_unreference((drm_intel_bo*)bo);
-  cl_free(host_ptr);
+  CL_FREE(host_ptr);
 }
 else
   device->host_unified_memory = CL_FALSE;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 283b07a..817cc00 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -157,7 +157,7 @@ intel_gpgpu_delete_finished(intel_gpgpu_t *gpgpu)
     drm_intel_bo_unreference(gpgpu->constant_b.bo);
 
   intel_batchbuffer_delete(gpgpu->batch);
-  cl_free(gpgpu);
+  CL_FREE(gpgpu);
 }
 
 /* Destroy the all intel_gpgpu, no matter finish or not, when driver destroy */
@@ -172,7 +172,7 @@ void intel_gpgpu_delete_all(intel_driver_t *drv)
     p = drv->gpgpu_list;
     drv->gpgpu_list = p->next;
     intel_gpgpu_delete_finished(p->gpgpu);
-    cl_free(p);
+    CL_FREE(p);
   }
   PPTHREAD_MUTEX_UNLOCK(drv);
 }
@@ -195,7 +195,7 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
          !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
         p->next = node->next;
         intel_gpgpu_delete_finished(node->gpgpu);
-        cl_free(node);
+        CL_FREE(node);
         node = p->next;
       } else {
         p = node;
@@ -207,7 +207,7 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
        !drm_intel_bo_busy(node->gpgpu->batch->buffer)) {
       drv->gpgpu_list = drv->gpgpu_list->next;
       intel_gpgpu_delete_finished(node->gpgpu);
-      cl_free(node);
+      CL_FREE(node);
     }
   }
   if (gpgpu == NULL)
@@ -215,7 +215,7 @@ intel_gpgpu_delete(intel_gpgpu_t *gpgpu)
 
   if(gpgpu->batch && gpgpu->batch->buffer &&
      drm_intel_bo_busy(gpgpu->batch->buffer)) {
-    TRY_ALLOC_NO_ERR (node, CALLOC(struct intel_gpgpu_node));
+    TRY_ALLOC_NO_ERR (node, CL_CALLOC(1, sizeof(struct intel_gpgpu_node)));
     node->gpgpu = gpgpu;
     node->next = NULL;
     p = drv->gpgpu_list;
@@ -238,7 +238,7 @@ intel_gpgpu_new(intel_driver_t *drv)
 {
   intel_gpgpu_t *state = NULL;
 
-  TRY_ALLOC_NO_ERR (state, CALLOC(intel_gpgpu_t));
+  TRY_ALLOC_NO_ERR (state, CL_CALLOC(1, sizeof(intel_gpgpu_t)));
   state->drv = drv;
   state->batch = intel_batchbuffer_new(state->drv);
   assert(state->batch);
@@ -2207,7 +2207,7 @@ static intel_event_t*
 intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
 {
   intel_event_t *event = NULL;
-  TRY_ALLOC_NO_ERR (event, CALLOC(intel_event_t));
+  TRY_ALLOC_NO_ERR (event, CL_CALLOC(1, sizeof(intel_event_t)));
 
   event->buffer = gpgpu->batch->buffer;
   if (event->buffer)
@@ -2222,7 +2222,7 @@ intel_gpgpu_event_new(intel_gpgpu_t *gpgpu)
 exit:
   return event;
 error:
-  cl_free(event);
+  CL_FREE(event);
   event = NULL;
   goto exit;
 }
@@ -2272,7 +2272,7 @@ intel_gpgpu_event_delete(intel_event_t *event)
     drm_intel_bo_unreference(event->buffer);
   if(event->ts_buf)
     drm_intel_bo_unreference(event->ts_buf);
-  cl_free(event);
+  CL_FREE(event);
 }
 
 /* IVB and HSW's result MUST shift in x86_64 system */
diff --git a/src/x11/dricommon.c b/src/x11/dricommon.c
index 98eb713..345bc47 100644
--- a/src/x11/dricommon.c
+++ b/src/x11/dricommon.c
@@ -115,14 +115,14 @@ dri_state_delete(dri_state_t *state)
   if (state == NULL)
     return;
   dri_state_close(state);
-  cl_free(state);
+  CL_FREE(state);
 }
 
 LOCAL dri_state_t*
 dri_state_new(void)
 {
   dri_state_t *state = NULL;
-  TRY_ALLOC_NO_ERR (state, CALLOC(dri_state_t));
+  TRY_ALLOC_NO_ERR (state, CL_CALLOC(1, sizeof(dri_state_t)));
   state->fd = -1;
   state->driConnectedFlag = NONE;
   dri_state_init_drawable_hash_table(state);
@@ -160,7 +160,7 @@ LOCAL dri_drawable_t*
 dri_state_create_drawable(dri_state_t *state, XID x_drawable)
 {
   dri2_drawable_t *dri2_drwble;
-  dri2_drwble = (dri2_drawable_t*)calloc(1, sizeof(*dri2_drwble));
+  dri2_drwble = (dri2_drawable_t*)CL_CALLOC(1, sizeof(*dri2_drwble));
 
   if (!dri2_drwble)
     return NULL;
-- 
2.7.4



More information about the Beignet mailing list