[Beignet] [PATCH] Delete useless cl_thread files.

Wed Sep 28 08:38:07 UTC 2016

From: Junyan He <junyan.he at intel.com>

Signed-off-by: Junyan He <junyan.he at intel.com>
---
 src/CMakeLists.txt          |   1 -
 src/cl_command_queue.c      |  37 -----
 src/cl_command_queue.h      |  14 --
 src/cl_command_queue_gen7.c |   2 -
 src/cl_device_id.c          |   1 -
 src/cl_thread.c             | 329 --------------------------------------------
 src/cl_thread.h             |  52 -------
 7 files changed, 436 deletions(-)
 delete mode 100644 src/cl_thread.c
 delete mode 100644 src/cl_thread.h

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 626b43f..559380a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -90,7 +90,6 @@ set(OPENCL_SRC
     cl_command_queue.h
     cl_command_queue_gen7.c
     cl_command_queue_enqueue.c
-    cl_thread.c
     cl_driver.h
     cl_driver.cpp
     cl_driver_defs.c
diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c
index 54a487c..a63efaf 100644
--- a/src/cl_command_queue.c
+++ b/src/cl_command_queue.c
@@ -25,7 +25,6 @@
 #include "cl_device_id.h"
 #include "cl_mem.h"
 #include "cl_utils.h"
-#include "cl_thread.h"
 #include "cl_alloc.h"
 #include "cl_driver.h"
 #include "cl_khr_icd.h"
@@ -47,10 +46,6 @@ cl_command_queue_new(cl_context ctx)
   CL_OBJECT_INIT_BASE(queue, CL_OBJECT_COMMAND_QUEUE_MAGIC);
   cl_command_queue_init_enqueue(queue);
 
-  if ((queue->thread_data = cl_thread_data_create()) == NULL) {
-    goto error;
-  }
-
   /* Append the command queue in the list */
   cl_context_add_queue(ctx, queue);
   queue->ctx = ctx;
@@ -82,8 +77,6 @@ cl_command_queue_delete(cl_command_queue queue)
   // a chance to call the call-back function.
   //cl_event_update_last_events(queue,1);
 
-  cl_thread_data_destroy(queue);
-  queue->thread_data = NULL;
   cl_mem_delete(queue->perf);
   cl_context_remove_queue(queue->ctx, queue);
   cl_free(queue->wait_events);
@@ -154,8 +147,6 @@ cl_command_queue_bind_image(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgpu)
 LOCAL cl_int
 cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k, cl_gpgpu gpgpu)
 {
-  //GET_QUEUE_THREAD_GPGPU(queue);
-
   /* Bind all user buffers (given by clSetKernelArg) */
   uint32_t i;
   enum gbe_arg_type arg_type; /* kind of argument */
@@ -248,34 +239,6 @@ cl_command_queue_flush_gpgpu(cl_gpgpu gpgpu)
   return CL_SUCCESS;
 }
 
-LOCAL cl_int
-cl_command_queue_flush(cl_command_queue queue)
-{
-  int err;
-  GET_QUEUE_THREAD_GPGPU(queue);
-  err = cl_command_queue_flush_gpgpu(gpgpu);
-  // We now keep a list of uncompleted events and check if they compelte
-  // every flush. This can make sure all events created have chance to be
-  // update status, so the callback functions or reference can be handled.
-  //cl_event_update_last_events(queue,0);
-
-  cl_event current_event = get_current_event(queue);
-  if (current_event && err == CL_SUCCESS) {
-    //err = cl_event_flush(current_event);
-    set_current_event(queue, NULL);
-  }
-  cl_invalid_thread_gpgpu(queue);
-  return err;
-}
-
-LOCAL cl_int
-cl_command_queue_finish(cl_command_queue queue)
-{
-  cl_gpgpu_sync(cl_get_thread_batch_buf(queue));
-  //cl_event_update_last_events(queue,1);
-  return CL_SUCCESS;
-}
-
 #define DEFAULT_WAIT_EVENTS_SIZE  16
 LOCAL void
 cl_command_queue_insert_event(cl_command_queue queue, cl_event event)
diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h
index 42ea7b2..7a8afbf 100644
--- a/src/cl_command_queue.h
+++ b/src/cl_command_queue.h
@@ -22,7 +22,6 @@
 
 #include "cl_internals.h"
 #include "cl_driver.h"
-#include "cl_thread.h"
 #include "cl_base_object.h"
 #include "CL/cl.h"
 #include <stdint.h>
@@ -52,7 +51,6 @@ struct _cl_command_queue {
   cl_int    wait_events_num;           /* Number of Non-complete user events */
   cl_int    wait_events_size;          /* The size of array that wait_events point to */
   cl_command_queue_properties  props;  /* Queue properties */
-  void *thread_data;                   /* Used to store thread context data */
   cl_mem perf;                         /* Where to put the perf counters */
 
   void* cmrt_event;                    /* the latest CmEvent* of the command queue */
@@ -63,12 +61,6 @@ struct _cl_command_queue {
          ((cl_base_object)obj)->magic == CL_OBJECT_COMMAND_QUEUE_MAGIC &&  \
          CL_OBJECT_GET_REF(obj) >= 1))
 
-/* The macro to get the thread specified gpgpu struct. */
-#define GET_QUEUE_THREAD_GPGPU(queue) \
-	cl_gpgpu gpgpu = queue ? cl_get_thread_gpgpu(queue) : NULL;  \
-	if (queue) \
-	  assert(gpgpu);
-
 /* Allocate and initialize a new command queue. Also insert it in the list of
  * command queue in the associated context
  */
@@ -92,15 +84,9 @@ extern cl_int cl_command_queue_ND_range(cl_command_queue queue,
 /* The memory object where to report the performance */
 extern cl_int cl_command_queue_set_report_buffer(cl_command_queue, cl_mem);
 
-/* Flush for the command queue */
-extern cl_int cl_command_queue_flush(cl_command_queue);
-
 /* Flush for the specified gpgpu */
 extern int cl_command_queue_flush_gpgpu(cl_gpgpu);
 
-/* Wait for the completion of the command queue */
-extern cl_int cl_command_queue_finish(cl_command_queue);
-
 /* Bind all the surfaces in the GPGPU state */
 extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel, cl_gpgpu);
 
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 5ad3b8b..37082a6 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -129,7 +129,6 @@ cl_upload_constant_buffer(cl_command_queue queue, cl_kernel ker, cl_gpgpu gpgpu)
   /* calculate constant buffer size
    * we need raw_size & aligned_size
    */
-  //GET_QUEUE_THREAD_GPGPU(queue);
   int32_t arg;
   size_t offset = 0;
   uint32_t raw_size = 0, aligned_size =0;
@@ -338,7 +337,6 @@ cl_command_queue_ND_range_gen7(cl_command_queue queue,
                                const size_t *global_wk_sz,
                                const size_t *local_wk_sz)
 {
-  //GET_QUEUE_THREAD_GPGPU(queue);
   cl_gpgpu gpgpu = cl_gpgpu_new(queue->ctx->drv);
   cl_context ctx = queue->ctx;
   char *final_curbe = NULL;  /* Includes them and one sub-buffer per group */
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index df3355c..e1ceee1 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -24,7 +24,6 @@
 #include "cl_driver.h"
 #include "cl_device_data.h"
 #include "cl_khr_icd.h"
-#include "cl_thread.h"
 #include "CL/cl.h"
 #include "CL/cl_ext.h"
 #include "CL/cl_intel.h"
diff --git a/src/cl_thread.c b/src/cl_thread.c
deleted file mode 100644
index 0780513..0000000
--- a/src/cl_thread.c
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- */
-#include <string.h>
-#include <stdio.h>
-
-#include "cl_thread.h"
-#include "cl_alloc.h"
-#include "cl_utils.h"
-
-/* Because the cl_command_queue can be used in several threads simultaneously but
-   without add ref to it, we now handle it like this:
-   Keep one threads_slot_array, every time the thread get gpgpu or batch buffer, if it
-   does not have a slot, assign it.
-   The resources are keeped in queue private, and resize it if needed.
-   When the thread exit, the slot will be set invalid.
-   When queue released, all the resources will be released. If user still enqueue, flush
-   or finish the queue after it has been released, the behavior is undefined.
-   TODO: Need to shrink the slot map.
-   */
-
-static int thread_array_num = 1;
-static int *thread_slot_map = NULL;
-static int thread_magic_num = 1;
-static pthread_mutex_t thread_queue_map_lock = PTHREAD_MUTEX_INITIALIZER;
-
-typedef struct _thread_spec_data {
-  cl_gpgpu gpgpu ;
-  int valid;
-  void* thread_batch_buf;
-  cl_event last_event;
-  cl_event current_event;
-  int thread_magic;
-} thread_spec_data;
-
-typedef struct _queue_thread_private {
-  thread_spec_data**  threads_data;
-  int threads_data_num;
-  pthread_mutex_t thread_data_lock;
-} queue_thread_private;
-
-static pthread_once_t key_once = PTHREAD_ONCE_INIT;
-static pthread_key_t thread_id_key;
-static pthread_key_t thread_magic_key;
-
-static void create_thread_key()
-{
-  pthread_key_create(&thread_id_key, NULL);
-  pthread_key_create(&thread_magic_key, NULL);
-}
-
-static thread_spec_data * __create_thread_spec_data(cl_command_queue queue, int create)
-{
-  queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data));
-  thread_spec_data* spec = NULL;
-  int i = 0;
-  int *id = NULL, *magic = NULL;
-
-  pthread_once(&key_once, create_thread_key);
-  id = pthread_getspecific(thread_id_key);
-  if(id == NULL) {
-    id = (int *)malloc(sizeof(int));
-    *id = -1;
-    pthread_setspecific(thread_id_key, id);
-  }
-  magic = pthread_getspecific(thread_magic_key);
-  if(magic == NULL) {
-    magic = (int *)malloc(sizeof(int));
-    *magic = -1;
-    pthread_setspecific(thread_magic_key, magic);
-  }
-
-  if (*id == -1) {
-    pthread_mutex_lock(&thread_queue_map_lock);
-    for (i = 0; i < thread_array_num; i++) {
-      if (thread_slot_map[i] == 0) {
-        *id = i;
-        break;
-      }
-    }
-
-    if (i == thread_array_num) {
-      thread_array_num *= 2;
-      thread_slot_map = realloc(thread_slot_map, sizeof(int) * thread_array_num);
-
-      if(thread_slot_map == NULL) {
-        pthread_mutex_unlock(&thread_queue_map_lock);
-        return NULL;
-      }
-
-      memset(thread_slot_map + thread_array_num/2, 0, sizeof(int) * (thread_array_num/2));
-      *id = thread_array_num/2;
-    }
-
-    thread_slot_map[*id] = 1;
-
-    *magic = thread_magic_num++;
-    pthread_mutex_unlock(&thread_queue_map_lock);
-  }
-
-  pthread_mutex_lock(&thread_private->thread_data_lock);
-  if (thread_array_num > thread_private->threads_data_num) {// just enlarge
-    int old_num = thread_private->threads_data_num;
-    thread_private->threads_data_num = thread_array_num;
-    thread_private->threads_data = realloc(thread_private->threads_data,
-                thread_private->threads_data_num * sizeof(void *));
-
-    if(thread_private->threads_data == NULL) {
-      pthread_mutex_unlock(&thread_private->thread_data_lock);
-      return NULL;
-    }
-
-    memset(thread_private->threads_data + old_num, 0,
-           sizeof(void*) * (thread_private->threads_data_num - old_num));
-  }
-
-  assert(*id != -1 && *id < thread_array_num);
-  spec = thread_private->threads_data[*id];
-  if (!spec && create) {
-       spec = CALLOC(thread_spec_data);
-       spec->thread_magic = *magic;
-       thread_private->threads_data[*id] = spec;
-  }
-
-  pthread_mutex_unlock(&thread_private->thread_data_lock);
-
-  return spec;
-}
-
-cl_event get_current_event(cl_command_queue queue)
-{
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  int *magic = pthread_getspecific(thread_magic_key);
-  assert(spec && magic && spec->thread_magic == *magic);
-  return spec->current_event;
-}
-
-cl_event get_last_event(cl_command_queue queue)
-{
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  int *magic = pthread_getspecific(thread_magic_key);
-  assert(spec && magic && spec->thread_magic == *magic);
-  return spec->last_event;
-}
-
-void set_current_event(cl_command_queue queue, cl_event e)
-{
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  int *magic = pthread_getspecific(thread_magic_key);
-  assert(spec && magic && spec->thread_magic == *magic);
-  spec->current_event = e;
-}
-
-void set_last_event(cl_command_queue queue, cl_event e)
-{
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  int *magic = pthread_getspecific(thread_magic_key);
-  assert(spec && magic && spec->thread_magic == *magic);
-  spec->last_event = e;
-}
-
-void* cl_thread_data_create(void)
-{
-  queue_thread_private* thread_private = CALLOC(queue_thread_private);
-
-  if (thread_private == NULL)
-    return NULL;
-
-  if (thread_slot_map == NULL) {
-    pthread_mutex_lock(&thread_queue_map_lock);
-    thread_slot_map = calloc(thread_array_num, sizeof(int));
-    pthread_mutex_unlock(&thread_queue_map_lock);
-
-  }
-
-  pthread_mutex_init(&thread_private->thread_data_lock, NULL);
-
-  pthread_mutex_lock(&thread_private->thread_data_lock);
-  thread_private->threads_data = malloc(thread_array_num * sizeof(void *));
-  memset(thread_private->threads_data, 0, sizeof(void*) * thread_array_num);
-  thread_private->threads_data_num = thread_array_num;
-  pthread_mutex_unlock(&thread_private->thread_data_lock);
-
-  return thread_private;
-}
-
-cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue)
-{
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  if(!spec)
-    return NULL;
-  int *magic = pthread_getspecific(thread_magic_key);
-  assert(magic);
-
-  if (!spec->thread_magic && spec->thread_magic != *magic) {
-    //We may get the slot from last thread. So free the resource.
-    spec->valid = 0;
-  }
-
-  if (!spec->valid) {
-    if (spec->thread_batch_buf) {
-      cl_gpgpu_unref_batch_buf(spec->thread_batch_buf);
-      spec->thread_batch_buf = NULL;
-    }
-    if (spec->gpgpu) {
-      cl_gpgpu_delete(spec->gpgpu);
-      spec->gpgpu = NULL;
-    }
-    TRY_ALLOC_NO_ERR(spec->gpgpu, cl_gpgpu_new(queue->ctx->drv));
-    spec->valid = 1;
-  }
-
- error:
-  return spec->gpgpu;
-}
-
-void cl_set_thread_batch_buf(cl_command_queue queue, void* buf)
-{
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  int *magic = pthread_getspecific(thread_magic_key);
-
-  assert(spec && magic && spec->thread_magic == *magic);
-
-  if (spec->thread_batch_buf) {
-    cl_gpgpu_unref_batch_buf(spec->thread_batch_buf);
-  }
-  spec->thread_batch_buf = buf;
-}
-
-void* cl_get_thread_batch_buf(cl_command_queue queue) {
-  thread_spec_data* spec = __create_thread_spec_data(queue, 1);
-  int *magic = pthread_getspecific(thread_magic_key);
-
-  assert(spec && magic && spec->thread_magic == *magic);
-
-  return spec->thread_batch_buf;
-}
-
-void cl_invalid_thread_gpgpu(cl_command_queue queue)
-{
-  int *id = pthread_getspecific(thread_id_key);
-  queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data));
-  thread_spec_data* spec = NULL;
-
-  pthread_mutex_lock(&thread_private->thread_data_lock);
-  assert(id);
-  spec = thread_private->threads_data[*id];
-  assert(spec);
-  pthread_mutex_unlock(&thread_private->thread_data_lock);
-
-  if (!spec->valid) {
-    return;
-  }
-
-  assert(spec->gpgpu);
-  cl_gpgpu_delete(spec->gpgpu);
-  spec->gpgpu = NULL;
-  spec->valid = 0;
-}
-
-cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue)
-{
-  int *id = pthread_getspecific(thread_id_key);
-  queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data));
-  thread_spec_data* spec = NULL;
-
-  pthread_mutex_lock(&thread_private->thread_data_lock);
-  assert(id);
-  spec = thread_private->threads_data[*id];
-  assert(spec);
-  pthread_mutex_unlock(&thread_private->thread_data_lock);
-
-  if (!spec->valid)
-    return NULL;
-
-  assert(spec->gpgpu);
-  cl_gpgpu gpgpu = spec->gpgpu;
-  spec->gpgpu = NULL;
-  spec->valid = 0;
-  return gpgpu;
-}
-
-/* The destructor for clean the thread specific data. */
-void cl_thread_data_destroy(cl_command_queue queue)
-{
-  int i = 0;
-  queue_thread_private *thread_private = ((queue_thread_private *)(queue->thread_data));
-  int threads_data_num;
-  thread_spec_data** threads_data;
-
-  pthread_mutex_lock(&thread_private->thread_data_lock);
-  threads_data_num = thread_private->threads_data_num;
-  threads_data = thread_private->threads_data;
-  thread_private->threads_data_num = 0;
-  thread_private->threads_data = NULL;
-  pthread_mutex_unlock(&thread_private->thread_data_lock);
-  cl_free(thread_private);
-  queue->thread_data = NULL;
-
-  for (i = 0; i < threads_data_num; i++) {
-    if (threads_data[i] != NULL && threads_data[i]->thread_batch_buf) {
-      cl_gpgpu_unref_batch_buf(threads_data[i]->thread_batch_buf);
-      threads_data[i]->thread_batch_buf = NULL;
-    }
-
-    if (threads_data[i] != NULL && threads_data[i]->valid) {
-      cl_gpgpu_delete(threads_data[i]->gpgpu);
-      threads_data[i]->gpgpu = NULL;
-      threads_data[i]->valid = 0;
-    }
-    cl_free(threads_data[i]);
-  }
-
-  cl_free(threads_data);
-}
diff --git a/src/cl_thread.h b/src/cl_thread.h
deleted file mode 100644
index d77526b..0000000
--- a/src/cl_thread.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright © 2012 Intel Corporation
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library. If not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#ifndef __CL_THREAD_H__
-#define __CL_THREAD_H__
-
-#include <pthread.h>
-#include "cl_internals.h"
-#include "cl_command_queue.h"
-
-/* Create the thread specific data. */
-void* cl_thread_data_create(void);
-
-/* The destructor for clean the thread specific data. */
-void cl_thread_data_destroy(cl_command_queue queue);
-
-/* Used to get the gpgpu struct of each thread. */
-cl_gpgpu cl_get_thread_gpgpu(cl_command_queue queue);
-
-/* Used to release the gpgpu struct of each thread. */
-void cl_invalid_thread_gpgpu(cl_command_queue queue);
-
-/* Used to set the batch buffer of each thread. */
-void cl_set_thread_batch_buf(cl_command_queue queue, void* buf);
-
-/* Used to get the batch buffer of each thread. */
-void* cl_get_thread_batch_buf(cl_command_queue queue);
-
-/* take current gpgpu from the thread gpgpu pool. */
-cl_gpgpu cl_thread_gpgpu_take(cl_command_queue queue);
-
-cl_event get_current_event(cl_command_queue queue);
-cl_event get_last_event(cl_command_queue queue);
-void set_current_event(cl_command_queue queue, cl_event e);
-void set_last_event(cl_command_queue queue, cl_event e);
-
-#endif /* __CL_THREAD_H__ */
-- 
2.7.4