[Beignet] [PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API

Zhigang Gong zhigang.gong at linux.intel.com
Thu Jul 11 18:17:25 PDT 2013


This patch causes one segfault at my machine. Could you check it again?

test_create_kernel:
  test_create_kernel()    [SUCCESS]

compiler_sub_char:
kernel name: compiler_sub_char
  compiler_sub_char()    [SUCCESS]

utest_run: /home/gongzg/git/fdo/beignet/src/cl_mem.c:496: cl_mem_delete: Assertion `mem->map_ref' failed.

Program received signal SIGABRT, Aborted.
0x00007ffff7062425 in raise () from /lib/x86_64-linux-gnu/libc.so.6
(gdb) bt
#0  0x00007ffff7062425 in raise () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x00007ffff7065b8b in abort () from /lib/x86_64-linux-gnu/libc.so.6
#2  0x00007ffff705b0ee in ?? () from /lib/x86_64-linux-gnu/libc.so.6
#3  0x00007ffff705b192 in __assert_fail () from /lib/x86_64-linux-gnu/libc.so.6
#4  0x00007ffff6e1e1be in cl_mem_delete (mem=0x1038790) at /home/gongzg/git/fdo/beignet/src/cl_mem.c:496
#5  cl_mem_delete (mem=0x1038790) at /home/gongzg/git/fdo/beignet/src/cl_mem.c:469
#6  0x00007ffff6e1927d in clReleaseMemObject (memobj=<optimized out>) at /home/gongzg/git/fdo/beignet/src/cl_api.c:521
#7  0x00007ffff7bbcd8e in cl_buffer_destroy () at /home/gongzg/git/fdo/beignet/utests/utest_helper.cpp:448
#8  0x00007ffff7bb9bb3 in UTest::runAll () at /home/gongzg/git/fdo/beignet/utests/utest.cpp:75
#9  0x00000000004011a0 in main (argc=1, argv=0x7fffffffe108) at /home/gongzg/git/fdo/beignet/utests/utest_run.cpp:38


On Wed, Jul 10, 2013 at 03:50:44PM +0800, junyan.he at inbox.com wrote:
> From: Junyan He <junyan.he at linux.intel.com>
> 
> In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and
> CL_MEM_USE_HOST_PTR flag support.
> CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do.
> CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API:
> 1> The host_ptr specified in clCreateBuffer is guaranteed to
> contain the latest bits in the region being mapped when the
> clEnqueueMapBuffer command has completed.
> 2> The pointer value returned by clEnqueueMapBuffer will be
> derived from the host_ptr specified when the buffer object is created.
> 
> We improve the clEnqueueMapBuffer to setup a map for the mapped
> address and do the data sync problem based on the address when
> mapped and unmapped.
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  src/cl_api.c |  125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  src/cl_mem.c |   43 ++++++++++++++++----
>  src/cl_mem.h |   10 +++++
>  3 files changed, 169 insertions(+), 9 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index dc52f0a..115c07a 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1467,7 +1467,9 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>                     cl_int *          errcode_ret)
>  {
>    void *ptr = NULL;
> +  void *mem_ptr = NULL;
>    cl_int err = CL_SUCCESS;
> +  int slot = -1;
>  
>    CHECK_QUEUE(command_queue);
>    CHECK_MEM(buffer);
> @@ -1500,10 +1502,66 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>  
>    ptr = (char*)ptr + offset;
>  
> +  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> +    assert(buffer->host_ptr);
> +    memcpy(buffer->host_ptr + offset, ptr, size);
> +    mem_ptr = buffer->host_ptr + offset;
> +  } else {
> +    mem_ptr = ptr;
> +  }
> +
> +  /* Record the mapped address. */
> +  if (!buffer->mapped_ptr_sz) {
> +    buffer->mapped_ptr_sz = 16;
> +    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> +          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> +    if (!buffer->mapped_ptr) {
> +      cl_mem_unmap_auto (buffer);
> +      err = CL_OUT_OF_HOST_MEMORY;
> +      ptr = NULL;
> +      goto error;
> +    }
> +
> +    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +    slot = 0;
> +  } else {
> +    int i = 0;
> +    for (; i < buffer->mapped_ptr_sz; i++) {
> +      if (buffer->mapped_ptr[i].ptr == NULL) {
> +        slot = i;
> +        break;
> +      }
> +    }
> +
> +    if (i == buffer->mapped_ptr_sz) {
> +      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> +          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> +      if (!new_ptr) {
> +        cl_mem_unmap_auto (buffer);
> +        err = CL_OUT_OF_HOST_MEMORY;
> +        ptr = NULL;
> +        goto error;
> +      }
> +      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +      memcpy(new_ptr, buffer->mapped_ptr,
> +             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +      slot = buffer->mapped_ptr_sz;
> +      buffer->mapped_ptr_sz *= 2;
> +      free(buffer->mapped_ptr);
> +      buffer->mapped_ptr = new_ptr;
> +    }
> +  }
> +
> +  assert(slot != -1);
> +  buffer->mapped_ptr[slot].ptr = mem_ptr;
> +  buffer->mapped_ptr[slot].v_ptr = ptr;
> +  buffer->mapped_ptr[slot].size = size;
> +  buffer->map_ref++;
> +
>  error:
>    if (errcode_ret)
>      *errcode_ret = err;
> -  return ptr;
> +  return mem_ptr;
>  }
>  
>  void *
> @@ -1578,7 +1636,70 @@ clEnqueueUnmapMemObject(cl_command_queue  command_queue,
>                          const cl_event *  event_wait_list,
>                          cl_event *        event)
>  {
> -  return cl_mem_unmap_auto(memobj);
> +  cl_int err = CL_SUCCESS;
> +  int i;
> +  size_t mapped_size = 0;
> +  void * v_ptr = NULL;
> +
> +  CHECK_QUEUE(command_queue);
> +  CHECK_MEM(memobj);
> +  if (command_queue->ctx != memobj->ctx) {
> +    err = CL_INVALID_CONTEXT;
> +    goto error;
> +  }
> +
> +  assert(memobj->mapped_ptr_sz >= memobj->map_ref);
> +  INVALID_VALUE_IF(!mapped_ptr);
> +  for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> +    if (memobj->mapped_ptr[i].ptr == mapped_ptr) {
> +      memobj->mapped_ptr[i].ptr = NULL;
> +      mapped_size = memobj->mapped_ptr[i].size;
> +      v_ptr = memobj->mapped_ptr[i].v_ptr;
> +      memobj->mapped_ptr[i].size = 0;
> +      memobj->mapped_ptr[i].v_ptr = NULL;
> +      memobj->map_ref--;
> +      break;
> +    }
> +  }
> +  /* can not find a mapped address? */
> +  INVALID_VALUE_IF(i == memobj->mapped_ptr_sz);
> +
> +  if (memobj->flags & CL_MEM_USE_HOST_PTR) {
> +    assert(mapped_ptr >= memobj->host_ptr &&
> +      mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size);
> +    /* Sync the data. */
> +    memcpy(v_ptr, mapped_ptr, mapped_size);
> +  } else {
> +    assert(v_ptr == mapped_ptr);
> +  }
> +
> +  cl_mem_unmap_auto(memobj);
> +
> +  /* shrink the mapped slot. */
> +  if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> +    int j = 0;
> +    cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> +	sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2));
> +    if (!new_ptr) {
> +      /* Just do nothing. */
> +      goto error;
> +    }
> +    memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr));
> +
> +    for (i = 0; i < memobj->mapped_ptr_sz; i++) {
> +      if (memobj->mapped_ptr[i].ptr) {
> +        new_ptr[j] = memobj->mapped_ptr[i];
> +        j++;
> +        assert(j < memobj->mapped_ptr_sz/2);
> +      }
> +    }
> +    memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2;
> +    free(memobj->mapped_ptr);
> +    memobj->mapped_ptr = new_ptr;
> +  }
> +
> +error:
> +  return err;
>  }
>  
>  cl_int
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 5465aa9..ce70305 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -106,10 +106,6 @@ cl_mem_allocate(cl_context ctx,
>    cl_ulong max_mem_size;
>  
>    assert(ctx);
> -  FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR,
> -            "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */
> -  FATAL_IF (flags & CL_MEM_USE_HOST_PTR,
> -            "CL_MEM_USE_HOST_PTR unsupported");   /* XXX */
>  
>    if ((err = cl_get_device_info(ctx->device,
>                                  CL_DEVICE_MAX_MEM_ALLOC_SIZE,
> @@ -172,11 +168,35 @@ cl_mem_new(cl_context ctx,
>             void *data,
>             cl_int *errcode_ret)
>  {
> +  /* Possible mem type combination:
> +       CL_MEM_ALLOC_HOST_PTR
> +       CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR
> +       CL_MEM_USE_HOST_PTR
> +       CL_MEM_COPY_HOST_PTR   */
> +
>    cl_int err = CL_SUCCESS;
>    cl_mem mem = NULL;
>  
> -  /* Check flags consistency */
> -  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) {
> +  /* This flag is valid only if host_ptr is not NULL */
> +  if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR ||
> +                flags & CL_MEM_USE_HOST_PTR) &&
> +                data == NULL)) {
> +    err = CL_INVALID_HOST_PTR;
> +    goto error;
> +  }
> +
> +  /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR
> +     are mutually exclusive. */
> +  if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR &&
> +               flags & CL_MEM_USE_HOST_PTR)) {
> +    err = CL_INVALID_HOST_PTR;
> +    goto error;
> +  }
> +
> +  /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR
> +     are mutually exclusive. */
> +  if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR &&
> +               flags & CL_MEM_USE_HOST_PTR)) {
>      err = CL_INVALID_HOST_PTR;
>      goto error;
>    }
> @@ -187,9 +207,12 @@ cl_mem_new(cl_context ctx,
>      goto error;
>  
>    /* Copy the data if required */
> -  if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */
> +  if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR)
>      cl_buffer_subdata(mem->bo, 0, sz, data);
>  
> +  if (flags & CL_MEM_USE_HOST_PTR)
> +    mem->host_ptr = data;
> +
>  exit:
>    if (errcode_ret)
>      *errcode_ret = err;
> @@ -418,6 +441,12 @@ cl_mem_delete(cl_mem mem)
>    pthread_mutex_unlock(&mem->ctx->buffer_lock);
>    cl_context_delete(mem->ctx);
>  
> +  /* Someone still mapped? */
> +  assert(mem->map_ref);
> +
> +  if (mem->mapped_ptr)
> +    free(mem->mapped_ptr);
> +
>    cl_free(mem);
>  }
>  
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index c204992..6d98698 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -49,6 +49,12 @@ typedef enum cl_image_tiling {
>    CL_TILE_Y  = 2
>  } cl_image_tiling_t;
>  
> +typedef struct _cl_mapped_ptr {
> +  void * ptr;
> +  void * v_ptr;
> +  size_t size;
> +}cl_mapped_ptr;
> +
>  /* Used for buffers and images */
>  struct _cl_mem {
>    DEFINE_ICD(dispatch)
> @@ -68,6 +74,10 @@ struct _cl_mem {
>    uint32_t intel_fmt;       /* format to provide in the surface state */
>    uint32_t bpp;             /* number of bytes per pixel */
>    cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */
> +  void * host_ptr;          /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */
> +  cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
> +  int mapped_ptr_sz;        /* The array size of mapped_ptr. */
> +  int map_ref;              /* The mapped count. */
>  };
>  
>  /* Query information about a memory object */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list