[Beignet] [PATCH 5/9] Implement event related functions.

Yang, Rong R rong.r.yang at intel.com
Mon Sep 26 06:20:42 UTC 2016


The patchset basically LGTM, but there is another issue that will cause segmentation fault when call clEnqueueFillBuffer with size=0, other similar apis may have the same issue, can you fix it and then I will push the patchset. 

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Wednesday, September 21, 2016 17:47
> To: beignet at lists.freedesktop.org
> Subject: [Beignet] [PATCH 5/9] Implement event related functions.
> 
> From: Junyan He <junyan.he at intel.com>
> 
> We want to implement the new event handle manner.
> We also move the API to different files to avoid
> a to big api.c file.
> 
> V2:
>   Fix a bug for readwrite_buffer_rect.
> 
> Signed-off-by: Junyan He <junyan.he at intel.com>
> ---
>  src/cl_api_command_queue.c |   55 ++
>  src/cl_api_event.c         |  243 +++++
>  src/cl_api_kernel.c        |  337 +++++++
>  src/cl_api_mem.c           | 2248
> ++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 2883 insertions(+)
>  create mode 100644 src/cl_api_command_queue.c
>  create mode 100644 src/cl_api_event.c
>  create mode 100644 src/cl_api_kernel.c
>  create mode 100644 src/cl_api_mem.c
> 
> diff --git a/src/cl_api_command_queue.c b/src/cl_api_command_queue.c
> new file mode 100644
> index 0000000..9f06deb
> --- /dev/null
> +++ b/src/cl_api_command_queue.c
> @@ -0,0 +1,55 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +#include "cl_command_queue.h"
> +#include "CL/cl.h"
> +#include <stdio.h>
> +
> +cl_int
> +clFlush(cl_command_queue command_queue)
> +{
> +  if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +    return CL_INVALID_COMMAND_QUEUE;
> +  }
> +
> +  return cl_command_queue_wait_flush(command_queue);
> +}
> +
> +cl_int
> +clFinish(cl_command_queue command_queue)
> +{
> +  if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +    return CL_INVALID_COMMAND_QUEUE;
> +  }
> +
> +  return cl_command_queue_wait_finish(command_queue);
> +}
> +
> +
> +cl_int
> +clReleaseCommandQueue(cl_command_queue command_queue)
> +{
> +  if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +    return CL_INVALID_COMMAND_QUEUE;
> +  }
> +
> +  cl_command_queue_wait_flush(command_queue);
> +
> +  cl_command_queue_delete(command_queue);
> +  return CL_SUCCESS;
> +}
> +
> diff --git a/src/cl_api_event.c b/src/cl_api_event.c
> new file mode 100644
> index 0000000..aec2cdf
> --- /dev/null
> +++ b/src/cl_api_event.c
> @@ -0,0 +1,243 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +#include "cl_event.h"
> +#include "cl_context.h"
> +#include "cl_command_queue.h"
> +#include "CL/cl.h"
> +#include <stdio.h>
> +
> +cl_event
> +clCreateUserEvent(cl_context context,
> +                  cl_int *errcode_ret)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_event event = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_CONTEXT(context)) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    event = cl_event_create(context, NULL, 0, NULL, CL_COMMAND_USER,
> &err);
> +  } while (0);
> +
> +  if (errcode_ret)
> +    *errcode_ret = err;
> +  return event;
> +}
> +
> +cl_int
> +clSetUserEventStatus(cl_event event,
> +                     cl_int execution_status)
> +{
> +  cl_int err = CL_SUCCESS;
> +
> +  if (!CL_OBJECT_IS_EVENT(event)) {
> +    return CL_INVALID_EVENT;
> +  }
> +
> +  if (execution_status > CL_COMPLETE) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  err = cl_event_set_status(event, execution_status);
> +  return err;
> +}
> +
> +/* 1.1 API, depreciated */
> +cl_int
> +clEnqueueMarker(cl_command_queue command_queue,
> +                cl_event *event)
> +{
> +  return clEnqueueMarkerWithWaitList(command_queue, 0, NULL, event);
> +}
> +
> +cl_int
> +clEnqueueMarkerWithWaitList(cl_command_queue command_queue,
> +                            cl_uint num_events_in_wait_list,
> +                            const cl_event *event_wait_list,
> +                            cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_event e = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (event == NULL) { /* Create a anonymous event, it can not be waited on
> and useless. */
> +      return CL_SUCCESS;
> +    }
> +
> +    e = cl_event_create_marker_or_barrier(command_queue,
> num_events_in_wait_list,
> +                                          event_wait_list, CL_FALSE, &err);
> +    if (err != CL_SUCCESS) {
> +      return err;
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE;
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +  return err;
> +}
> +
> +/* 1.1 API, depreciated */
> +cl_int
> +clEnqueueBarrier(cl_command_queue command_queue)
> +{
> +  return clEnqueueBarrierWithWaitList(command_queue, 0, NULL, NULL);
> +}
> +
> +cl_int
> +clEnqueueBarrierWithWaitList(cl_command_queue command_queue,
> +                             cl_uint num_events_in_wait_list,
> +                             const cl_event *event_wait_list,
> +                             cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_event e = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create_marker_or_barrier(command_queue,
> num_events_in_wait_list,
> +                                          event_wait_list, CL_TRUE, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE;
> +      /* Already a completed barrier, no need to insert to queue. */
> +    } else {
> +      cl_command_queue_insert_barrier_event(command_queue, e);
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +  return err;
> +}
> +
> +cl_int
> +clWaitForEvents(cl_uint num_events,
> +                const cl_event *event_list)
> +{
> +  cl_int err = CL_SUCCESS;
> +
> +  if (num_events == 0 || event_list == NULL) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  err = cl_event_check_waitlist(num_events, event_list, NULL, NULL);
> +  if (err != CL_SUCCESS) {
> +    return err;
> +  }
> +
> +  err = cl_event_wait_for_events_list(num_events, event_list);
> +  return err;
> +}
> +
> +/* 1.1 API, depreciated */
> +cl_int
> +clEnqueueWaitForEvents(cl_command_queue command_queue,
> +                       cl_uint num_events,
> +                       const cl_event *event_list)
> +{
> +  cl_int err = CL_SUCCESS;
> +
> +  if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +    return CL_INVALID_COMMAND_QUEUE;
> +  }
> +
> +  err = clWaitForEvents(num_events, event_list);
> +  return err;
> +}
> +
> +cl_int
> +clSetEventCallback(cl_event event,
> +                   cl_int command_exec_callback_type,
> +                   void(CL_CALLBACK *pfn_notify)(cl_event, cl_int, void *),
> +                   void *user_data)
> +{
> +  cl_int err = CL_SUCCESS;
> +
> +  if (!CL_OBJECT_IS_EVENT(event)) {
> +    return CL_INVALID_EVENT;
> +  }
> +
> +  if ((pfn_notify == NULL) ||
> +      (command_exec_callback_type > CL_SUBMITTED) ||
> +      (command_exec_callback_type < CL_COMPLETE)) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  err = cl_event_set_callback(event, command_exec_callback_type,
> pfn_notify, user_data);
> +  return err;
> +}
> diff --git a/src/cl_api_kernel.c b/src/cl_api_kernel.c
> new file mode 100644
> index 0000000..a1075d7
> --- /dev/null
> +++ b/src/cl_api_kernel.c
> @@ -0,0 +1,337 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +#include "cl_mem.h"
> +#include "cl_kernel.h"
> +#include "cl_enqueue.h"
> +#include "cl_command_queue.h"
> +#include "cl_event.h"
> +#include "cl_context.h"
> +#include "cl_program.h"
> +#include "cl_alloc.h"
> +#include "CL/cl.h"
> +#include <stdio.h>
> +#include <string.h>
> +
> +cl_int
> +clEnqueueNDRangeKernel(cl_command_queue command_queue,
> +                       cl_kernel kernel,
> +                       cl_uint work_dim,
> +                       const size_t *global_work_offset,
> +                       const size_t *global_work_size,
> +                       const size_t *local_work_size,
> +                       cl_uint num_events_in_wait_list,
> +                       const cl_event *event_wait_list,
> +                       cl_event *event)
> +{
> +  size_t fixed_global_off[] = {0, 0, 0};
> +  size_t fixed_global_sz[] = {1, 1, 1};
> +  size_t fixed_local_sz[] = {1, 1, 1};
> +  cl_int err = CL_SUCCESS;
> +  cl_uint i;
> +  cl_event e = NULL;
> +  cl_int event_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_KERNEL(kernel)) {
> +      err = CL_INVALID_KERNEL;
> +      break;
> +    }
> +
> +    /* Check number of dimensions we have */
> +    if (UNLIKELY(work_dim == 0 || work_dim > 3)) {
> +      err = CL_INVALID_WORK_DIMENSION;
> +      break;
> +    }
> +
> +    /* We need a work size per dimension */
> +    if (UNLIKELY(global_work_size == NULL)) {
> +      err = CL_INVALID_GLOBAL_WORK_SIZE;
> +      break;
> +    }
> +
> +    if (kernel->vme) {
> +      if (work_dim != 2) {
> +        err = CL_INVALID_WORK_DIMENSION;
> +        break;
> +      }
> +      if (local_work_size != NULL) {
> +        err = CL_INVALID_WORK_GROUP_SIZE;
> +        break;
> +      }
> +    }
> +
> +    if (global_work_offset != NULL) {
> +      for (i = 0; i < work_dim; ++i) {
> +        if (UNLIKELY(global_work_offset[i] + global_work_size[i] > (size_t)-1)) {
> +          err = CL_INVALID_GLOBAL_OFFSET;
> +          break;
> +        }
> +      }
> +    }
> +
> +    /* Local sizes must be non-null and divide global sizes */
> +    if (local_work_size != NULL) {
> +      for (i = 0; i < work_dim; ++i) {
> +        if (UNLIKELY(local_work_size[i] == 0 || global_work_size[i] %
> local_work_size[i])) {
> +          err = CL_INVALID_WORK_GROUP_SIZE;
> +          break;
> +        }
> +      }
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    /* Queue and kernel must share the same context */
> +    assert(kernel->program);
> +    if (command_queue->ctx != kernel->program->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (local_work_size != NULL) {
> +      for (i = 0; i < work_dim; ++i)
> +        fixed_local_sz[i] = local_work_size[i];
> +    } else {
> +      if (kernel->vme) {
> +        fixed_local_sz[0] = 16;
> +        fixed_local_sz[1] = 1;
> +      } else {
> +        uint j, maxDimSize = 64 /* from 64? */, maxGroupSize = 256;
> //MAX_WORK_GROUP_SIZE may too large
> +        size_t realGroupSize = 1;
> +        for (i = 0; i < work_dim; i++) {
> +          for (j = maxDimSize; j > 1; j--) {
> +            if (global_work_size[i] % j == 0 && j <= maxGroupSize) {
> +              fixed_local_sz[i] = j;
> +              maxGroupSize = maxGroupSize / j;
> +              maxDimSize = maxGroupSize > maxDimSize ? maxDimSize :
> maxGroupSize;
> +              break; //choose next work_dim
> +            }
> +          }
> +          realGroupSize *= fixed_local_sz[i];
> +        }
> +        if (realGroupSize % 8 != 0)
> +          DEBUGP(DL_WARNING, "unable to find good values for
> local_work_size[i], please provide\n"
> +                             " local_work_size[] explicitly, you can find good values
> with\n"
> +                             " trial-and-error method.");
> +      }
> +    }
> +
> +    if (kernel->vme) {
> +      fixed_global_sz[0] = (global_work_size[0] + 15) / 16 * 16;
> +      fixed_global_sz[1] = (global_work_size[1] + 15) / 16;
> +    } else {
> +      for (i = 0; i < work_dim; ++i)
> +        fixed_global_sz[i] = global_work_size[i];
> +    }
> +
> +    if (global_work_offset != NULL)
> +      for (i = 0; i < work_dim; ++i)
> +        fixed_global_off[i] = global_work_offset[i];
> +
> +    if (kernel->compile_wg_sz[0] || kernel->compile_wg_sz[1] || kernel-
> >compile_wg_sz[2]) {
> +      if (fixed_local_sz[0] != kernel->compile_wg_sz[0] ||
> +          fixed_local_sz[1] != kernel->compile_wg_sz[1] ||
> +          fixed_local_sz[2] != kernel->compile_wg_sz[2]) {
> +        err = CL_INVALID_WORK_GROUP_SIZE;
> +        break;
> +      }
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_NDRANGE_KERNEL, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* Do device specific checks are enqueue the kernel */
> +    err = cl_command_queue_ND_range(command_queue, kernel, e,
> work_dim,
> +                                    fixed_global_off, fixed_global_sz, fixed_local_sz);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    event_status = cl_event_is_ready(e);
> +    if (event_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (event_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueTask(cl_command_queue command_queue,
> +              cl_kernel kernel,
> +              cl_uint num_events_in_wait_list,
> +              const cl_event *event_wait_list,
> +              cl_event *event)
> +{
> +  const size_t global_size[3] = {1, 0, 0};
> +  const size_t local_size[3] = {1, 0, 0};
> +
> +  return clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
> +                                global_size, local_size,
> +                                num_events_in_wait_list, event_wait_list, event);
> +}
> +
> +cl_int
> +clEnqueueNativeKernel(cl_command_queue command_queue,
> +                      void (*user_func)(void *),
> +                      void *args,
> +                      size_t cb_args,
> +                      cl_uint num_mem_objects,
> +                      const cl_mem *mem_list,
> +                      const void **args_mem_loc,
> +                      cl_uint num_events_in_wait_list,
> +                      const cl_event *event_wait_list,
> +                      cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  void *new_args = NULL;
> +  void **new_args_mem_loc = NULL;
> +  cl_mem *new_mem_list = NULL;
> +  cl_int i;
> +  cl_int e_status;
> +  cl_event e = NULL;
> +  enqueue_data *data = NULL;
> +
> +  do {
> +    if (user_func == NULL ||
> +        (args == NULL && cb_args > 0) ||
> +        (args == NULL && num_mem_objects > 0) ||
> +        (args != NULL && cb_args == 0) ||
> +        (num_mem_objects > 0 && (mem_list == NULL || args_mem_loc ==
> NULL)) ||
> +        (num_mem_objects == 0 && (mem_list != NULL || args_mem_loc !=
> NULL))) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    //Per spec, need copy args
> +    if (cb_args) {
> +      new_args = CL_MALLOC(cb_args);
> +      if (num_mem_objects) {
> +        new_args_mem_loc = CL_MALLOC(sizeof(void *) *
> num_mem_objects);
> +        new_mem_list = CL_MALLOC(sizeof(cl_mem) * num_mem_objects);
> +        memcpy(new_mem_list, mem_list, sizeof(cl_mem) *
> num_mem_objects);
> +      }
> +
> +      if (new_args == NULL || new_args_mem_loc == NULL) {
> +        err = CL_OUT_OF_HOST_MEMORY;
> +        break;
> +      }
> +      memcpy(new_args, args, cb_args);
> +
> +      for (i = 0; i < num_mem_objects; ++i) {
> +        if (!CL_OBJECT_IS_MEM(mem_list[i])) {
> +          err = CL_INVALID_MEM_OBJECT;
> +          break;
> +        }
> +
> +        new_args_mem_loc[i] = new_args + (args_mem_loc[i] - args); //change
> to new args
> +      }
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_NATIVE_KERNEL, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueNativeKernel;
> +    data->mem_list = new_mem_list;
> +    data->ptr = new_args;
> +    data->size = cb_args;
> +    data->offset = (size_t)num_mem_objects;
> +    data->const_ptr = new_args_mem_loc;
> +    data->user_func = user_func;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err != CL_SUCCESS) {
> +    if (new_args)
> +      CL_FREE(new_args);
> +    if (new_mem_list)
> +      CL_FREE(new_mem_list);
> +    if (new_args_mem_loc)
> +      CL_FREE(new_args_mem_loc);
> +  }
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> diff --git a/src/cl_api_mem.c b/src/cl_api_mem.c
> new file mode 100644
> index 0000000..054c37a
> --- /dev/null
> +++ b/src/cl_api_mem.c
> @@ -0,0 +1,2248 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see
> <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +#include "cl_mem.h"
> +#include "cl_enqueue.h"
> +#include "cl_command_queue.h"
> +#include "cl_event.h"
> +#include "CL/cl.h"
> +
> +void *
> +clEnqueueMapBuffer(cl_command_queue command_queue,
> +                   cl_mem buffer,
> +                   cl_bool blocking_map,
> +                   cl_map_flags map_flags,
> +                   size_t offset,
> +                   size_t size,
> +                   cl_uint num_events_in_wait_list,
> +                   const cl_event *event_wait_list,
> +                   cl_event *event,
> +                   cl_int *errcode_ret)
> +{
> +  cl_int err = CL_SUCCESS;
> +  void *ptr = NULL;
> +  void *mem_ptr = NULL;
> +  cl_event e = NULL;
> +  cl_int e_status;
> +  enqueue_data *data = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (!size || offset + size > buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if ((map_flags & CL_MAP_READ &&
> +         buffer->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) ||
> +        (map_flags & (CL_MAP_WRITE |
> CL_MAP_WRITE_INVALIDATE_REGION) &&
> +         buffer->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS))) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_MAP_BUFFER, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_map) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueMapBuffer;
> +    data->mem_obj = buffer;
> +    data->offset = offset;
> +    data->size = size;
> +    data->ptr = NULL;
> +    data->unsync_map = 0;
> +    if (map_flags & (CL_MAP_WRITE |
> CL_MAP_WRITE_INVALIDATE_REGION))
> +      data->write_map = 1;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      ptr = data->ptr;
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the
> address.
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +      ptr = data->ptr;
> +      assert(ptr);
> +
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +
> +    err = cl_mem_record_map_mem(buffer, ptr, &mem_ptr, offset, size,
> NULL, NULL);
> +    assert(err == CL_SUCCESS);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  if (errcode_ret)
> +    *errcode_ret = err;
> +
> +  return mem_ptr;
> +}
> +
> +cl_int
> +clEnqueueUnmapMemObject(cl_command_queue command_queue,
> +                        cl_mem memobj,
> +                        void *mapped_ptr,
> +                        cl_uint num_events_in_wait_list,
> +                        const cl_event *event_wait_list,
> +                        cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_int e_status;
> +  enqueue_data *data = NULL;
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_MEM(memobj)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != memobj->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_UNMAP_MEM_OBJECT, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueUnmapMemObject;
> +    data->mem_obj = memobj;
> +    data->ptr = mapped_ptr;
> +
> +    if (e_status == CL_COMPLETE) { // No need to wait
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE;
> +    } else { // May need to wait some event to complete.
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueReadBuffer(cl_command_queue command_queue,
> +                    cl_mem buffer,
> +                    cl_bool blocking_read,
> +                    size_t offset,
> +                    size_t size,
> +                    void *ptr,
> +                    cl_uint num_events_in_wait_list,
> +                    const cl_event *event_wait_list,
> +                    cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  enqueue_data *data = NULL;
> +  cl_int e_status;
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (!ptr || !size || offset + size > buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_READ_BUFFER, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_read) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueReadBuffer;
> +    data->mem_obj = buffer;
> +    data->ptr = ptr;
> +    data->offset = offset;
> +    data->size = size;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueWriteBuffer(cl_command_queue command_queue,
> +                     cl_mem buffer,
> +                     cl_bool blocking_write,
> +                     size_t offset,
> +                     size_t size,
> +                     const void *ptr,
> +                     cl_uint num_events_in_wait_list,
> +                     const cl_event *event_wait_list,
> +                     cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  enqueue_data *data = NULL;
> +  cl_int e_status;
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (!ptr || !size || offset + size > buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (buffer->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_WRITE_BUFFER, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_write) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueWriteBuffer;
> +    data->mem_obj = buffer;
> +    data->const_ptr = ptr;
> +    data->offset = offset;
> +    data->size = size;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueReadBufferRect(cl_command_queue command_queue,
> +                        cl_mem buffer,
> +                        cl_bool blocking_read,
> +                        const size_t *buffer_origin,
> +                        const size_t *host_origin,
> +                        const size_t *region,
> +                        size_t buffer_row_pitch,
> +                        size_t buffer_slice_pitch,
> +                        size_t host_row_pitch,
> +                        size_t host_slice_pitch,
> +                        void *ptr,
> +                        cl_uint num_events_in_wait_list,
> +                        const cl_event *event_wait_list,
> +                        cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  size_t total_size = 0;
> +  enqueue_data *data = NULL;
> +  cl_int e_status;
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (buffer->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (buffer_row_pitch == 0)
> +      buffer_row_pitch = region[0];
> +    if (buffer_slice_pitch == 0)
> +      buffer_slice_pitch = region[1] * buffer_row_pitch;
> +
> +    if (host_row_pitch == 0)
> +      host_row_pitch = region[0];
> +    if (host_slice_pitch == 0)
> +      host_slice_pitch = region[1] * host_row_pitch;
> +
> +    if (buffer_row_pitch < region[0] ||
> +        host_row_pitch < region[0]) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if ((buffer_slice_pitch < region[1] * buffer_row_pitch ||
> buffer_slice_pitch % buffer_row_pitch != 0) ||
> +        (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch %
> host_row_pitch != 0)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    total_size = (buffer_origin[2] + region[2] - 1) * buffer_slice_pitch +
> +                 (buffer_origin[1] + region[1] - 1) * buffer_row_pitch +
> buffer_origin[0] + region[0];
> +    if (total_size > buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_READ_BUFFER_RECT, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_read) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueReadBufferRect;
> +    data->mem_obj = buffer;
> +    data->ptr = ptr;
> +    data->origin[0] = buffer_origin[0];
> +    data->origin[1] = buffer_origin[1];
> +    data->origin[2] = buffer_origin[2];
> +    data->host_origin[0] = host_origin[0];
> +    data->host_origin[1] = host_origin[1];
> +    data->host_origin[2] = host_origin[2];
> +    data->region[0] = region[0];
> +    data->region[1] = region[1];
> +    data->region[2] = region[2];
> +    data->row_pitch = buffer_row_pitch;
> +    data->slice_pitch = buffer_slice_pitch;
> +    data->host_row_pitch = host_row_pitch;
> +    data->host_slice_pitch = host_slice_pitch;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueWriteBufferRect(cl_command_queue command_queue,
> +                         cl_mem buffer,
> +                         cl_bool blocking_write,
> +                         const size_t *buffer_origin,
> +                         const size_t *host_origin,
> +                         const size_t *region,
> +                         size_t buffer_row_pitch,
> +                         size_t buffer_slice_pitch,
> +                         size_t host_row_pitch,
> +                         size_t host_slice_pitch,
> +                         const void *ptr,
> +                         cl_uint num_events_in_wait_list,
> +                         const cl_event *event_wait_list,
> +                         cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  size_t total_size = 0;
> +  enqueue_data *data = NULL;
> +  cl_int e_status;
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (buffer->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    if (!ptr || !region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (buffer_row_pitch == 0)
> +      buffer_row_pitch = region[0];
> +    if (buffer_slice_pitch == 0)
> +      buffer_slice_pitch = region[1] * buffer_row_pitch;
> +
> +    if (host_row_pitch == 0)
> +      host_row_pitch = region[0];
> +    if (host_slice_pitch == 0)
> +      host_slice_pitch = region[1] * host_row_pitch;
> +
> +    if (buffer_row_pitch < region[0] ||
> +        host_row_pitch < region[0]) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if ((buffer_slice_pitch < region[1] * buffer_row_pitch ||
> buffer_slice_pitch % buffer_row_pitch != 0) ||
> +        (host_slice_pitch < region[1] * host_row_pitch || host_slice_pitch %
> host_row_pitch != 0)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    total_size = (buffer_origin[2] + region[2] - 1) * buffer_slice_pitch +
> +                 (buffer_origin[1] + region[1] - 1) * buffer_row_pitch +
> +                 buffer_origin[0] + region[0];
> +
> +    if (total_size > buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_WRITE_BUFFER_RECT, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_write) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueWriteBufferRect;
> +    data->mem_obj = buffer;
> +    data->const_ptr = ptr;
> +    data->origin[0] = buffer_origin[0];
> +    data->origin[1] = buffer_origin[1];
> +    data->origin[2] = buffer_origin[2];
> +    data->host_origin[0] = host_origin[0];
> +    data->host_origin[1] = host_origin[1];
> +    data->host_origin[2] = host_origin[2];
> +    data->region[0] = region[0];
> +    data->region[1] = region[1];
> +    data->region[2] = region[2];
> +    data->row_pitch = buffer_row_pitch;
> +    data->slice_pitch = buffer_slice_pitch;
> +    data->host_row_pitch = host_row_pitch;
> +    data->host_slice_pitch = host_slice_pitch;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueCopyBuffer(cl_command_queue command_queue,
> +                    cl_mem src_buffer,
> +                    cl_mem dst_buffer,
> +                    size_t src_offset,
> +                    size_t dst_offset,
> +                    size_t cb,
> +                    cl_uint num_events_in_wait_list,
> +                    const cl_event *event_wait_list,
> +                    cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_event e = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_MEM(src_buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +    if (!CL_OBJECT_IS_MEM(dst_buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != src_buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +    if (command_queue->ctx != dst_buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (src_offset + cb > src_buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +    if (dst_offset + cb > dst_buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    /* Check overlap */
> +    if (src_buffer == dst_buffer && (src_offset <= dst_offset && dst_offset
> <= src_offset + cb - 1) &&
> +        (dst_offset <= src_offset && src_offset <= dst_offset + cb - 1)) {
> +      err = CL_MEM_COPY_OVERLAP;
> +      break;
> +    }
> +
> +    /* Check sub overlap */
> +    if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type
> == CL_MEM_SUBBUFFER_TYPE) {
> +      struct _cl_mem_buffer *src_b = (struct _cl_mem_buffer *)src_buffer;
> +      struct _cl_mem_buffer *dst_b = (struct _cl_mem_buffer *)dst_buffer;
> +      size_t src_sub_offset = src_b->sub_offset;
> +      size_t dst_sub_offset = dst_b->sub_offset;
> +      if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset &&
> +           dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1)
> &&
> +          (dst_offset + dst_sub_offset <= src_offset + src_sub_offset &&
> +           src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) {
> +        err = CL_MEM_COPY_OVERLAP;
> +        break;
> +      }
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_COPY_BUFFER, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_mem_copy(command_queue, e, src_buffer, dst_buffer,
> src_offset, dst_offset, cb);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +/* The following code checking overlap is from Appendix of openCL spec 1.1
> */
> +static cl_bool
> +check_copy_overlap(const size_t src_offset[3],
> +                   const size_t dst_offset[3],
> +                   const size_t region[3],
> +                   size_t row_pitch, size_t slice_pitch)
> +{
> +  const size_t src_min[] = {src_offset[0], src_offset[1], src_offset[2]};
> +  const size_t src_max[] = {src_offset[0] + region[0],
> +                            src_offset[1] + region[1],
> +                            src_offset[2] + region[2]};
> +  const size_t dst_min[] = {dst_offset[0], dst_offset[1], dst_offset[2]};
> +  const size_t dst_max[] = {dst_offset[0] + region[0],
> +                            dst_offset[1] + region[1],
> +                            dst_offset[2] + region[2]};
> +  // Check for overlap
> +  cl_bool overlap = CL_TRUE;
> +  unsigned i;
> +  size_t dst_start = dst_offset[2] * slice_pitch +
> +                     dst_offset[1] * row_pitch + dst_offset[0];
> +  size_t dst_end = dst_start + (region[2] * slice_pitch +
> +                                region[1] * row_pitch + region[0]);
> +  size_t src_start = src_offset[2] * slice_pitch +
> +                     src_offset[1] * row_pitch + src_offset[0];
> +  size_t src_end = src_start + (region[2] * slice_pitch +
> +                                region[1] * row_pitch + region[0]);
> +
> +  for (i = 0; i != 3; ++i) {
> +    overlap = overlap && (src_min[i] < dst_max[i]) && (src_max[i] >
> dst_min[i]);
> +  }
> +
> +  if (!overlap) {
> +    size_t delta_src_x = (src_offset[0] + region[0] > row_pitch) ? src_offset[0]
> + region[0] - row_pitch : 0;
> +    size_t delta_dst_x = (dst_offset[0] + region[0] > row_pitch) ?
> dst_offset[0] + region[0] - row_pitch : 0;
> +    if ((delta_src_x > 0 && delta_src_x > dst_offset[0]) ||
> +        (delta_dst_x > 0 && delta_dst_x > src_offset[0])) {
> +      if ((src_start <= dst_start && dst_start < src_end) ||
> +          (dst_start <= src_start && src_start < dst_end))
> +        overlap = CL_TRUE;
> +    }
> +    if (region[2] > 1) {
> +      size_t src_height = slice_pitch / row_pitch;
> +      size_t dst_height = slice_pitch / row_pitch;
> +      size_t delta_src_y = (src_offset[1] + region[1] > src_height) ?
> src_offset[1] + region[1] - src_height : 0;
> +      size_t delta_dst_y = (dst_offset[1] + region[1] > dst_height) ?
> dst_offset[1] + region[1] - dst_height : 0;
> +      if ((delta_src_y > 0 && delta_src_y > dst_offset[1]) ||
> +          (delta_dst_y > 0 && delta_dst_y > src_offset[1])) {
> +        if ((src_start <= dst_start && dst_start < src_end) ||
> +            (dst_start <= src_start && src_start < dst_end))
> +          overlap = CL_TRUE;
> +      }
> +    }
> +  }
> +  return overlap;
> +}
> +
> +cl_int
> +clEnqueueCopyBufferRect(cl_command_queue command_queue,
> +                        cl_mem src_buffer,
> +                        cl_mem dst_buffer,
> +                        const size_t *src_origin,
> +                        const size_t *dst_origin,
> +                        const size_t *region,
> +                        size_t src_row_pitch,
> +                        size_t src_slice_pitch,
> +                        size_t dst_row_pitch,
> +                        size_t dst_slice_pitch,
> +                        cl_uint num_events_in_wait_list,
> +                        const cl_event *event_wait_list,
> +                        cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_event e = NULL;
> +  size_t total_size = 0;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_MEM(src_buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +    if (!CL_OBJECT_IS_MEM(dst_buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if ((command_queue->ctx != src_buffer->ctx) ||
> +        (command_queue->ctx != dst_buffer->ctx)) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (!region || region[0] == 0 || region[1] == 0 || region[2] == 0) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (src_row_pitch == 0)
> +      src_row_pitch = region[0];
> +    if (src_slice_pitch == 0)
> +      src_slice_pitch = region[1] * src_row_pitch;
> +
> +    if (dst_row_pitch == 0)
> +      dst_row_pitch = region[0];
> +    if (dst_slice_pitch == 0)
> +      dst_slice_pitch = region[1] * dst_row_pitch;
> +
> +    if (src_row_pitch < region[0] ||
> +        dst_row_pitch < region[0]) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if ((src_slice_pitch < region[1] * src_row_pitch || src_slice_pitch %
> src_row_pitch != 0) ||
> +        (dst_slice_pitch < region[1] * dst_row_pitch || dst_slice_pitch %
> dst_row_pitch != 0)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    total_size = (src_origin[2] + region[2] - 1) * src_slice_pitch +
> +                 (src_origin[1] + region[1] - 1) * src_row_pitch + src_origin[0] +
> region[0];
> +    if (total_size > src_buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +    total_size = (dst_origin[2] + region[2] - 1) * dst_slice_pitch +
> +                 (dst_origin[1] + region[1] - 1) * dst_row_pitch + dst_origin[0] +
> region[0];
> +    if (total_size > dst_buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (src_buffer == dst_buffer &&
> +        (src_row_pitch != dst_row_pitch || src_slice_pitch != dst_slice_pitch)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (src_buffer == dst_buffer &&
> +        check_copy_overlap(src_origin, dst_origin, region, src_row_pitch,
> src_slice_pitch)) {
> +      err = CL_MEM_COPY_OVERLAP;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_COPY_BUFFER_RECT, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_mem_copy_buffer_rect(command_queue, e, src_buffer,
> dst_buffer, src_origin, dst_origin, region,
> +                                  src_row_pitch, src_slice_pitch, dst_row_pitch,
> dst_slice_pitch);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueFillBuffer(cl_command_queue command_queue,
> +                    cl_mem buffer,
> +                    const void *pattern,
> +                    size_t pattern_size,
> +                    size_t offset,
> +                    size_t size,
> +                    cl_uint num_events_in_wait_list,
> +                    const cl_event *event_wait_list,
> +                    cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  static size_t valid_sz[] = {1, 2, 4, 8, 16, 32, 64, 128};
> +  int i = 0;
> +  cl_event e = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    if (command_queue->ctx != buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (offset + size > buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (pattern == NULL) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    for (i = 0; i < sizeof(valid_sz) / sizeof(size_t); i++) {
> +      if (valid_sz[i] == pattern_size)
> +        break;
> +    }
> +    if (i == sizeof(valid_sz) / sizeof(size_t)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (offset % pattern_size || size % pattern_size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_FILL_BUFFER, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_mem_fill(command_queue, e, pattern, pattern_size, buffer,
> offset, size);
> +    if (err) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueMigrateMemObjects(cl_command_queue command_queue,
> +                           cl_uint num_mem_objects,
> +                           const cl_mem *mem_objects,
> +                           cl_mem_migration_flags flags,
> +                           cl_uint num_events_in_wait_list,
> +                           const cl_event *event_wait_list,
> +                           cl_event *event)
> +{
> +  /* So far, we just support 1 device and no subdevice. So all the command
> queues
> +     belong to the small context. There is no need to migrate the mem
> objects by now. */
> +  cl_int err = CL_SUCCESS;
> +  cl_event e = NULL;
> +  cl_int e_status;
> +  cl_uint i = 0;
> +
> +  do {
> +    if (!flags & CL_MIGRATE_MEM_OBJECT_HOST) {
> +      if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +        err = CL_INVALID_COMMAND_QUEUE;
> +        break;
> +      }
> +    }
> +
> +    if (num_mem_objects == 0 || mem_objects == NULL) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (flags && flags & ~(CL_MIGRATE_MEM_OBJECT_HOST |
> CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    for (i = 0; i < num_mem_objects; i++) {
> +      if (!CL_OBJECT_IS_BUFFER(mem_objects[i])) {
> +        err = CL_INVALID_MEM_OBJECT;
> +        break;
> +      }
> +      if (mem_objects[i]->ctx != command_queue->ctx) {
> +        err = CL_INVALID_CONTEXT;
> +        break;
> +      }
> +    }
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_MIGRATE_MEM_OBJECTS,
> &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* Noting to do now, just enqueue a event. */
> +    e->exec_data.type = EnqueueMigrateMemObj;
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +/************************************ Images
> *********************************************/
> +static cl_int
> +check_image_region(struct _cl_mem_image *image, const size_t *pregion,
> size_t *region)
> +{
> +  if (pregion == NULL) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +    region[0] = pregion[0];
> +    region[1] = 1;
> +    region[2] = pregion[1];
> +  } else {
> +    region[0] = pregion[0];
> +    region[1] = pregion[1];
> +    region[2] = pregion[2];
> +  }
> +
> +  if ((region[0] == 0) || (region[1] == 0) || (region[2] == 0)) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  return CL_SUCCESS;
> +}
> +
> +static cl_int
> +check_image_origin(struct _cl_mem_image *image, const size_t *porigin,
> size_t *origin)
> +{
> +  if (porigin == NULL) {
> +    return CL_INVALID_VALUE;
> +  }
> +
> +  if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) {
> +    origin[0] = porigin[0];
> +    origin[1] = 0;
> +    origin[2] = porigin[1];
> +  } else {
> +    origin[0] = porigin[0];
> +    origin[1] = porigin[1];
> +    origin[2] = porigin[2];
> +  }
> +
> +  return CL_SUCCESS;
> +}
> +
> +void *
> +clEnqueueMapImage(cl_command_queue command_queue,
> +                  cl_mem mem,
> +                  cl_bool blocking_map,
> +                  cl_map_flags map_flags,
> +                  const size_t *porigin,
> +                  const size_t *pregion,
> +                  size_t *image_row_pitch,
> +                  size_t *image_slice_pitch,
> +                  cl_uint num_events_in_wait_list,
> +                  const cl_event *event_wait_list,
> +                  cl_event *event,
> +                  cl_int *errcode_ret)
> +{
> +  cl_int err = CL_SUCCESS;
> +  void *ptr = NULL;
> +  void *mem_ptr = NULL;
> +  size_t offset = 0;
> +  struct _cl_mem_image *image = NULL;
> +  cl_int e_status;
> +  enqueue_data *data = NULL;
> +  size_t region[3];
> +  size_t origin[3];
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_IMAGE(mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    image = cl_mem_image(mem);
> +
> +    err = check_image_region(image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(image, porigin, origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != mem->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (origin[0] + region[0] > image->w ||
> +        origin[1] + region[1] > image->h ||
> +        origin[2] + region[2] > image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (!image_row_pitch || (image->slice_pitch && !image_slice_pitch)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if ((map_flags & CL_MAP_READ &&
> +         mem->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) ||
> +        (map_flags & (CL_MAP_WRITE |
> CL_MAP_WRITE_INVALIDATE_REGION) &&
> +         mem->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS))) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_MAP_IMAGE, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_map) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueMapImage;
> +    data->mem_obj = mem;
> +    data->origin[0] = origin[0];
> +    data->origin[1] = origin[1];
> +    data->origin[2] = origin[2];
> +    data->region[0] = region[0];
> +    data->region[1] = region[1];
> +    data->region[2] = region[2];
> +    data->ptr = ptr;
> +    data->unsync_map = 1;
> +    if (map_flags & (CL_MAP_WRITE |
> CL_MAP_WRITE_INVALIDATE_REGION))
> +      data->write_map = 1;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      ptr = data->ptr;
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      err = cl_enqueue_handle(data, CL_SUBMITTED); // Submit to get the
> address.
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +      ptr = data->ptr;
> +      assert(ptr);
> +
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +
> +    /* Store and write back map info. */
> +    if (mem->flags & CL_MEM_USE_HOST_PTR) {
> +      if (image_slice_pitch)
> +        *image_slice_pitch = image->host_slice_pitch;
> +      *image_row_pitch = image->host_row_pitch;
> +
> +      offset = image->bpp * origin[0] + image->host_row_pitch * origin[1] +
> +               image->host_slice_pitch * origin[2];
> +    } else {
> +      if (image_slice_pitch)
> +        *image_slice_pitch = image->slice_pitch;
> +      if (image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
> +        *image_row_pitch = image->slice_pitch;
> +      else
> +        *image_row_pitch = image->row_pitch;
> +
> +      offset = image->bpp * origin[0] + image->row_pitch * origin[1] + image-
> >slice_pitch * origin[2];
> +    }
> +
> +    err = cl_mem_record_map_mem(mem, ptr, &mem_ptr, offset, 0, origin,
> region);
> +    assert(err == CL_SUCCESS); // Easy way, do not use unmap to handle
> error.
> +  } while (0);
> +
> +  if (err != CL_SUCCESS) {
> +    if (e) {
> +      cl_event_delete(e);
> +      e = NULL;
> +    }
> +
> +    assert(ptr == NULL);
> +  }
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  if (errcode_ret)
> +    *errcode_ret = err;
> +
> +  return mem_ptr;
> +}
> +
> +cl_int
> +clEnqueueReadImage(cl_command_queue command_queue,
> +                   cl_mem mem,
> +                   cl_bool blocking_read,
> +                   const size_t *porigin,
> +                   const size_t *pregion,
> +                   size_t row_pitch,
> +                   size_t slice_pitch,
> +                   void *ptr,
> +                   cl_uint num_events_in_wait_list,
> +                   const cl_event *event_wait_list,
> +                   cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  struct _cl_mem_image *image = NULL;
> +  enqueue_data *data = NULL;
> +  cl_int e_status;
> +  size_t region[3];
> +  size_t origin[3];
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_IMAGE(mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    image = cl_mem_image(mem);
> +
> +    err = check_image_region(image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(image, porigin, origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != mem->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (origin[0] + region[0] > image->w ||
> +        origin[1] + region[1] > image->h ||
> +        origin[2] + region[2] > image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (!row_pitch) {
> +      row_pitch = image->bpp * region[0];
> +    } else if (row_pitch < image->bpp * region[0]) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (image->slice_pitch) {
> +      if (!slice_pitch) {
> +        slice_pitch = row_pitch * region[1];
> +      } else if (slice_pitch < row_pitch * region[1]) {
> +        err = CL_INVALID_VALUE;
> +        break;
> +      }
> +    } else if (slice_pitch) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (!ptr) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (mem->flags & (CL_MEM_HOST_WRITE_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_READ_IMAGE, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_read) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueReadImage;
> +    data->mem_obj = mem;
> +    data->ptr = ptr;
> +    data->origin[0] = origin[0];
> +    data->origin[1] = origin[1];
> +    data->origin[2] = origin[2];
> +    data->region[0] = region[0];
> +    data->region[1] = region[1];
> +    data->region[2] = region[2];
> +    data->row_pitch = row_pitch;
> +    data->slice_pitch = slice_pitch;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueWriteImage(cl_command_queue command_queue,
> +                    cl_mem mem,
> +                    cl_bool blocking_write,
> +                    const size_t *porigin,
> +                    const size_t *pregion,
> +                    size_t row_pitch,
> +                    size_t slice_pitch,
> +                    const void *ptr,
> +                    cl_uint num_events_in_wait_list,
> +                    const cl_event *event_wait_list,
> +                    cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  struct _cl_mem_image *image = NULL;
> +  enqueue_data *data = NULL;
> +  cl_int e_status;
> +  size_t region[3];
> +  size_t origin[3];
> +  cl_event e = NULL;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_IMAGE(mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    image = cl_mem_image(mem);
> +
> +    err = check_image_region(image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(image, porigin, origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != mem->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (origin[0] + region[0] > image->w ||
> +        origin[1] + region[1] > image->h ||
> +        origin[2] + region[2] > image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (!row_pitch) {
> +      row_pitch = image->bpp * region[0];
> +    } else if (row_pitch < image->bpp * region[0]) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (image->slice_pitch) {
> +      if (!slice_pitch) {
> +        slice_pitch = row_pitch * region[1];
> +      } else if (slice_pitch < row_pitch * region[1]) {
> +        err = CL_INVALID_VALUE;
> +        break;
> +      }
> +    } else if (slice_pitch) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (!ptr) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (mem->flags & (CL_MEM_HOST_READ_ONLY |
> CL_MEM_HOST_NO_ACCESS)) {
> +      err = CL_INVALID_OPERATION;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_WRITE_IMAGE, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (blocking_write) {
> +      err = cl_event_wait_for_event_ready(e);
> +      if (err != CL_SUCCESS)
> +        break;
> +
> +      /* Blocking call API is a sync point of flush. */
> +      err = cl_command_queue_wait_flush(command_queue);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +    }
> +
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) {
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    }
> +
> +    data = &e->exec_data;
> +    data->type = EnqueueWriteImage;
> +    data->mem_obj = mem;
> +    data->const_ptr = ptr;
> +    data->origin[0] = origin[0];
> +    data->origin[1] = origin[1];
> +    data->origin[2] = origin[2];
> +    data->region[0] = region[0];
> +    data->region[1] = region[1];
> +    data->region[2] = region[2];
> +    data->row_pitch = row_pitch;
> +    data->slice_pitch = slice_pitch;
> +
> +    if (e_status == CL_COMPLETE) {
> +      // Sync mode, no need to queue event.
> +      err = cl_enqueue_handle(data, CL_COMPLETE);
> +      if (err != CL_SUCCESS) {
> +        assert(err < 0);
> +        e->status = err;
> +        break;
> +      }
> +
> +      e->status = CL_COMPLETE; // Just set the status, no notify. No one
> depend on us now.
> +    } else {
> +      cl_command_queue_enqueue_event(command_queue, e);
> +    }
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueCopyImage(cl_command_queue command_queue,
> +                   cl_mem src_mem,
> +                   cl_mem dst_mem,
> +                   const size_t *psrc_origin,
> +                   const size_t *pdst_origin,
> +                   const size_t *pregion,
> +                   cl_uint num_events_in_wait_list,
> +                   const cl_event *event_wait_list,
> +                   cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  cl_bool overlap = CL_TRUE;
> +  cl_int i = 0;
> +  cl_event e = NULL;
> +  struct _cl_mem_image *src_image = NULL;
> +  struct _cl_mem_image *dst_image = NULL;
> +  size_t region[3];
> +  size_t src_origin[3];
> +  size_t dst_origin[3];
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_IMAGE(src_mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +    if (!CL_OBJECT_IS_IMAGE(dst_mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    src_image = cl_mem_image(src_mem);
> +    dst_image = cl_mem_image(dst_mem);
> +
> +    err = check_image_region(src_image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(src_image, psrc_origin, src_origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +    err = check_image_origin(dst_image, pdst_origin, dst_origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != src_mem->ctx ||
> +        command_queue->ctx != dst_mem->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (src_image->fmt.image_channel_order != dst_image-
> >fmt.image_channel_order ||
> +        src_image->fmt.image_channel_data_type != dst_image-
> >fmt.image_channel_data_type) {
> +      err = CL_IMAGE_FORMAT_MISMATCH;
> +      break;
> +    }
> +
> +    if (src_origin[0] + region[0] > src_image->w ||
> +        src_origin[1] + region[1] > src_image->h ||
> +        src_origin[2] + region[2] > src_image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (dst_origin[0] + region[0] > dst_image->w ||
> +        dst_origin[1] + region[1] > dst_image->h ||
> +        dst_origin[2] + region[2] > dst_image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (src_origin[2] != 0 || region[2] != 1)) ||
> +        (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (dst_origin[2] != 0 || region[2] != 1))) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (src_image == dst_image) {
> +      for (i = 0; i < 3; i++) {
> +        overlap = overlap && (src_origin[i] < dst_origin[i] + region[i]) &&
> +                  (dst_origin[i] < src_origin[i] + region[i]);
> +      }
> +      if (overlap == CL_TRUE) {
> +        err = CL_MEM_COPY_OVERLAP;
> +        break;
> +      }
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_COPY_IMAGE, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_mem_kernel_copy_image(command_queue, e, src_image,
> dst_image,
> +                                   src_origin, dst_origin, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueCopyImageToBuffer(cl_command_queue command_queue,
> +                           cl_mem src_mem,
> +                           cl_mem dst_buffer,
> +                           const size_t *psrc_origin,
> +                           const size_t *pregion,
> +                           size_t dst_offset,
> +                           cl_uint num_events_in_wait_list,
> +                           const cl_event *event_wait_list,
> +                           cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  struct _cl_mem_image *src_image = NULL;
> +  size_t region[3];
> +  size_t src_origin[3];
> +  cl_event e = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_IMAGE(src_mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +    if (!CL_OBJECT_IS_BUFFER(dst_buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    src_image = cl_mem_image(src_mem);
> +
> +    err = check_image_region(src_image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(src_image, psrc_origin, src_origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != src_mem->ctx ||
> +        command_queue->ctx != dst_buffer->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (dst_offset + region[0] * region[1] * region[2] * src_image->bpp >
> dst_buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (src_origin[0] + region[0] > src_image->w ||
> +        src_origin[1] + region[1] > src_image->h ||
> +        src_origin[2] + region[2] > src_image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (src_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (src_origin[2] != 0 || region[2] != 1)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_COPY_IMAGE_TO_BUFFER,
> &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_mem_copy_image_to_buffer(command_queue, e, src_image,
> dst_buffer,
> +                                      src_origin, dst_offset, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueCopyBufferToImage(cl_command_queue command_queue,
> +                           cl_mem src_buffer,
> +                           cl_mem dst_mem,
> +                           size_t src_offset,
> +                           const size_t *pdst_origin,
> +                           const size_t *pregion,
> +                           cl_uint num_events_in_wait_list,
> +                           const cl_event *event_wait_list,
> +                           cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  struct _cl_mem_image *dst_image = NULL;
> +  size_t region[3];
> +  size_t dst_origin[3];
> +  cl_event e = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_BUFFER(src_buffer)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +    if (!CL_OBJECT_IS_IMAGE(dst_mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    dst_image = cl_mem_image(dst_mem);
> +
> +    err = check_image_region(dst_image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(dst_image, pdst_origin, dst_origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != src_buffer->ctx ||
> +        command_queue->ctx != dst_mem->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (src_offset + region[0] * region[1] * region[2] * dst_image->bpp >
> src_buffer->size) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (dst_origin[0] + region[0] > dst_image->w ||
> +        dst_origin[1] + region[1] > dst_image->h ||
> +        dst_origin[2] + region[2] > dst_image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D &&
> (dst_origin[2] != 0 || region[2] != 1)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_COPY_BUFFER_TO_IMAGE,
> &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_mem_copy_buffer_to_image(command_queue, e, src_buffer,
> dst_image,
> +                                      src_offset, dst_origin, region);
> +
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> +
> +cl_int
> +clEnqueueFillImage(cl_command_queue command_queue,
> +                   cl_mem mem,
> +                   const void *fill_color,
> +                   const size_t *porigin,
> +                   const size_t *pregion,
> +                   cl_uint num_events_in_wait_list,
> +                   const cl_event *event_wait_list,
> +                   cl_event *event)
> +{
> +  cl_int err = CL_SUCCESS;
> +  size_t region[3];
> +  size_t origin[3];
> +  cl_event e = NULL;
> +  struct _cl_mem_image *image = NULL;
> +  cl_int e_status;
> +
> +  do {
> +    if (!CL_OBJECT_IS_COMMAND_QUEUE(command_queue)) {
> +      err = CL_INVALID_COMMAND_QUEUE;
> +      break;
> +    }
> +
> +    if (!CL_OBJECT_IS_IMAGE(mem)) {
> +      err = CL_INVALID_MEM_OBJECT;
> +      break;
> +    }
> +
> +    image = cl_mem_image(mem);
> +
> +    err = check_image_region(image, pregion, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = check_image_origin(image, porigin, origin);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    if (command_queue->ctx != mem->ctx) {
> +      err = CL_INVALID_CONTEXT;
> +      break;
> +    }
> +
> +    if (fill_color == NULL) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (origin[0] + region[0] > image->w ||
> +        origin[1] + region[1] > image->h ||
> +        origin[2] + region[2] > image->depth) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (image->image_type == CL_MEM_OBJECT_IMAGE2D && (origin[2] != 0
> || region[2] != 1)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    if (image->image_type == CL_MEM_OBJECT_IMAGE1D && (origin[2] != 0
> || origin[1] != 0 ||
> +                                                       region[2] != 1 || region[1] != 1)) {
> +      err = CL_INVALID_VALUE;
> +      break;
> +    }
> +
> +    err = cl_event_check_waitlist(num_events_in_wait_list, event_wait_list,
> +                                  event, command_queue->ctx);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    e = cl_event_create(command_queue->ctx, command_queue,
> num_events_in_wait_list,
> +                        event_wait_list, CL_COMMAND_FILL_IMAGE, &err);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    err = cl_image_fill(command_queue, fill_color, image, origin, region);
> +    if (err != CL_SUCCESS) {
> +      break;
> +    }
> +
> +    /* We will flush the ndrange if no event depend. Else we will add it to
> queue list.
> +       The finish or Complete status will always be done in queue list. */
> +    e_status = cl_event_is_ready(e);
> +    if (e_status < CL_COMPLETE) { // Error happend, cancel.
> +      err = CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST;
> +      break;
> +    } else if (e_status == CL_COMPLETE) {
> +      err = cl_enqueue_handle(&e->exec_data, CL_SUBMITTED);
> +      if (err != CL_SUCCESS) {
> +        break;
> +      }
> +
> +      e->status = CL_SUBMITTED;
> +    }
> +
> +    cl_command_queue_enqueue_event(command_queue, e);
> +  } while (0);
> +
> +  if (err == CL_SUCCESS && event) {
> +    *event = e;
> +  } else {
> +    cl_event_delete(e);
> +  }
> +
> +  return err;
> +}
> --
> 2.7.4
> 
> 



More information about the Beignet mailing list