[Beignet] [PATCH 1/2] Add clEnqueueMapBuffer and clEnqueueMapImage non-blocking map support.

Lu, Guanqun guanqun.lu at intel.com
Thu Aug 22 01:35:02 PDT 2013


+  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
+    assert(buffer->host_ptr);
+    //memcpy(buffer->host_ptr + offset, ptr, size);
+    mem_ptr = buffer->host_ptr + offset;

any reason to keep the commented line there? we should just remove it, otherwise it's confusing. (maybe add another comment...)


> -----Original Message-----
> From: beignet-bounces+guanqun.lu=intel.com at lists.freedesktop.org
> [mailto:beignet-bounces+guanqun.lu=intel.com at lists.freedesktop.org] On
> Behalf Of Yang Rong
> Sent: Thursday, August 22, 2013 4:05 PM
> To: beignet at lists.freedesktop.org
> Cc: Yang, Rong R
> Subject: [Beignet] [PATCH 1/2] Add clEnqueueMapBuffer and
> clEnqueueMapImage non-blocking map support.
> 
> There is a unsync map function drm_intel_gem_bo_map_unsynchronized in
> drm, that can
> be used to do non-blocking map. But this function only map gtt, so force to use
> map
> gtt for all clEnqueueMapBuffer and clEnqueueMapImage.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_api.c             | 81
> +++++++++++++++++++++++++++++++++++++++++++++--
>  src/cl_driver.h          |  4 +++
>  src/cl_driver_defs.c     |  1 +
>  src/cl_enqueue.c         | 82 +++++++-----------------------------------------
>  src/cl_mem.c             | 14 +++++++--
>  src/cl_mem.h             |  5 ++-
>  src/intel/intel_driver.c |  1 +
>  7 files changed, 113 insertions(+), 75 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 4f048ee..d4fdb7f 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1576,6 +1576,9 @@ clEnqueueMapBuffer(cl_command_queue
> command_queue,
>                     cl_int *          errcode_ret)
>  {
>    cl_int err = CL_SUCCESS;
> +  void *ptr = NULL;
> +  void *mem_ptr = NULL;
> +  cl_int slot = -1;
>    enqueue_data *data, no_wait_data = { 0 };
> 
>    CHECK_QUEUE(command_queue);
> @@ -1602,6 +1605,69 @@ clEnqueueMapBuffer(cl_command_queue
> command_queue,
>      goto error;
>    }
> 
> +  if (!(ptr = cl_mem_map_auto(buffer, CL_FALSE))) {
> +    err = CL_MAP_FAILURE;
> +    goto error;
> +  }
> +
> +  ptr = (char*)ptr + offset;
> +
> +  if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> +    assert(buffer->host_ptr);
> +    //memcpy(buffer->host_ptr + offset, ptr, size);
> +    mem_ptr = buffer->host_ptr + offset;
> +  } else {
> +    mem_ptr = ptr;
> +  }
> +
> +  /* Record the mapped address. */
> +  if (!buffer->mapped_ptr_sz) {
> +    buffer->mapped_ptr_sz = 16;
> +    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> +          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> +    if (!buffer->mapped_ptr) {
> +      cl_mem_unmap_auto (buffer);
> +      err = CL_OUT_OF_HOST_MEMORY;
> +      ptr = NULL;
> +      goto error;
> +    }
> +
> +    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> +    slot = 0;
> +  } else {
> +   int i = 0;
> +    for (; i < buffer->mapped_ptr_sz; i++) {
> +      if (buffer->mapped_ptr[i].ptr == NULL) {
> +        slot = i;
> +        break;
> +      }
> +   }
> +
> +    if (i == buffer->mapped_ptr_sz) {
> +      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> +          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> +      if (!new_ptr) {
> +       cl_mem_unmap_auto (buffer);
> +        err = CL_OUT_OF_HOST_MEMORY;
> +        ptr = NULL;
> +        goto error;
> +      }
> +      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> +      memcpy(new_ptr, buffer->mapped_ptr,
> +             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> +      slot = buffer->mapped_ptr_sz;
> +      buffer->mapped_ptr_sz *= 2;
> +      free(buffer->mapped_ptr);
> +      buffer->mapped_ptr = new_ptr;
> +    }
> +  }
> +
> +  assert(slot != -1);
> +  buffer->mapped_ptr[slot].ptr = mem_ptr;
> +  buffer->mapped_ptr[slot].v_ptr = ptr;
> +  buffer->mapped_ptr[slot].size = size;
> +  buffer->map_ref++;
> +
>    TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, buffer->ctx);
> 
>    data = &no_wait_data;
> @@ -1610,6 +1676,7 @@ clEnqueueMapBuffer(cl_command_queue
> command_queue,
>    data->offset      = offset;
>    data->size        = size;
>    data->map_flags   = map_flags;
> +  data->ptr         = ptr;
> 
>    if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
>                     event, data, CL_COMMAND_READ_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> @@ -1620,7 +1687,7 @@ clEnqueueMapBuffer(cl_command_queue
> command_queue,
>  error:
>    if (errcode_ret)
>      *errcode_ret = err;
> -  return data->ptr;
> +  return mem_ptr;
>  }
> 
>  void *
> @@ -1638,6 +1705,7 @@ clEnqueueMapImage(cl_command_queue
> command_queue,
>                    cl_int *           errcode_ret)
>  {
>    cl_int err = CL_SUCCESS;
> +  void *ptr  = NULL;
>    enqueue_data *data, no_wait_data = { 0 };
> 
>    CHECK_QUEUE(command_queue);
> @@ -1673,6 +1741,14 @@ clEnqueueMapImage(cl_command_queue
> command_queue,
>      goto error;
>    }
> 
> +  if (!(ptr = cl_mem_map_auto(image, CL_FALSE))) {
> +    err = CL_MAP_FAILURE;
> +    goto error;
> +  }
> +
> +  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] +
> image->slice_pitch*origin[2];
> +  ptr = (char*)ptr + offset;
> +
>    TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list,
> event, image->ctx);
> 
>    data = &no_wait_data;
> @@ -1683,6 +1759,7 @@ clEnqueueMapImage(cl_command_queue
> command_queue,
>    data->row_pitch   = *image_row_pitch;
>    data->slice_pitch = *image_slice_pitch;
>    data->map_flags   = map_flags;
> +  data->ptr         = ptr;
> 
>    if(handle_events(command_queue, num_events_in_wait_list,
> event_wait_list,
>                     event, data, CL_COMMAND_READ_BUFFER) ==
> CL_ENQUEUE_EXECUTE_IMM) {
> @@ -1693,7 +1770,7 @@ clEnqueueMapImage(cl_command_queue
> command_queue,
>  error:
>    if (errcode_ret)
>      *errcode_ret = err;
> -  return data->ptr; //TODO: map and unmap first
> +  return ptr; //TODO: map and unmap first
>  }
> 
>  cl_int
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index 1a0ec38..0ce03fe 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -257,6 +257,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap;
>  typedef int (cl_buffer_map_gtt_cb)(cl_buffer);
>  extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt;
> 
> +/* Map a buffer in the GTT domain, non waiting the GPU read or write*/
> +typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer);
> +extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync;
> +
>  /* Unmap a buffer in the GTT domain */
>  typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer);
>  extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt;
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index e7412de..7c4c866 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -36,6 +36,7 @@ LOCAL cl_buffer_unreference_cb *cl_buffer_unreference
> = NULL;
>  LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
>  LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
>  LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL;
> +LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL;
>  LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL;
>  LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
>  LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index a112cc4..a1c2be9 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -32,7 +32,7 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
>    cl_int err = CL_SUCCESS;
>    void* src_ptr;
> 
> -  if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
> +  if (!(src_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> @@ -50,7 +50,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
>    cl_int err = CL_SUCCESS;
>    void* dst_ptr;
> 
> -  if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
> +  if (!(dst_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> @@ -72,7 +72,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
>    const size_t* origin = data->origin;
>    const size_t* region = data->region;
> 
> -  if (!(src_ptr = cl_mem_map_auto(image))) {
> +  if (!(src_ptr = cl_mem_map_auto(image, CL_TRUE))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> @@ -116,7 +116,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
>    const size_t *origin = data->origin;
>    const size_t *region = data->region;
> 
> -  if (!(dst_ptr = cl_mem_map_auto(image))) {
> +  if (!(dst_ptr = cl_mem_map_auto(image, CL_TRUE))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> @@ -156,93 +156,35 @@ cl_int cl_enqueue_map_buffer(enqueue_data
> *data)
> 
>    void *ptr = NULL;
>    cl_int err = CL_SUCCESS;
> -  void *mem_ptr = NULL;
> -  cl_int slot = -1;
>    cl_mem buffer = data->mem_obj;
> -
> -  if (!(ptr = cl_mem_map_auto(buffer))) {
> +  //because using unsync map in clEnqueueMapBuffer, so force use map_gtt
> here
> +  if (!(ptr = cl_mem_map_gtt(buffer))) {
>      err = CL_MAP_FAILURE;
> +    goto error;
>    }
> -
>    ptr = (char*)ptr + data->offset;
> +  assert(data->ptr == ptr);
> 
>    if(buffer->flags & CL_MEM_USE_HOST_PTR) {
>      assert(buffer->host_ptr);
>      memcpy(buffer->host_ptr + data->offset, ptr, data->size);
> -    mem_ptr = buffer->host_ptr + data->offset;
> -  } else {
> -    mem_ptr = ptr;
> -  }
> -
> -  /* Record the mapped address. */
> -  if (!buffer->mapped_ptr_sz) {
> -    buffer->mapped_ptr_sz = 16;
> -    buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> -          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> -    if (!buffer->mapped_ptr) {
> -      cl_mem_unmap_auto (buffer);
> -      err = CL_OUT_OF_HOST_MEMORY;
> -      ptr = NULL;
> -      goto error;
> -    }
> -
> -    memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> -    slot = 0;
> -  } else {
> -   int i = 0;
> -    for (; i < buffer->mapped_ptr_sz; i++) {
> -      if (buffer->mapped_ptr[i].ptr == NULL) {
> -        slot = i;
> -        break;
> -      }
> -   }
> -
> -    if (i == buffer->mapped_ptr_sz) {
> -      cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> -          sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> -      if (!new_ptr) {
> -       cl_mem_unmap_auto (buffer);
> -        err = CL_OUT_OF_HOST_MEMORY;
> -        ptr = NULL;
> -        goto error;
> -      }
> -      memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz *
> sizeof(cl_mapped_ptr));
> -      memcpy(new_ptr, buffer->mapped_ptr,
> -             buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> -      slot = buffer->mapped_ptr_sz;
> -      buffer->mapped_ptr_sz *= 2;
> -      free(buffer->mapped_ptr);
> -      buffer->mapped_ptr = new_ptr;
> -    }
>    }
> 
> -  assert(slot != -1);
> -  buffer->mapped_ptr[slot].ptr = mem_ptr;
> -  buffer->mapped_ptr[slot].v_ptr = ptr;
> -  buffer->mapped_ptr[slot].size = data->size;
> -  buffer->map_ref++;
> -
> -  data->ptr = mem_ptr;
> -
>  error:
>    return err;
>  }
> 
>  cl_int cl_enqueue_map_image(enqueue_data *data)
>  {
> -  void *ptr = NULL;
>    cl_int err = CL_SUCCESS;
> -
>    cl_mem image = data->mem_obj;
> -  const size_t *origin = data->origin;
> -
> -  if (!(ptr = cl_mem_map_auto(image))) {
> +  void *ptr = NULL;
> +  //because using unsync map in clEnqueueMapImage, so force use map_gtt
> here
> +  if (!(ptr = cl_mem_map_gtt(image))) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> -
> -  size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] +
> image->slice_pitch*origin[2];
> -  data->ptr = (char*)ptr + offset;
> +  assert(data->ptr == (char*)ptr + data->offset);
> 
>  error:
>    return err;
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index f794ce7..5ce25e4 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -291,7 +291,7 @@ cl_mem_copy_image(cl_mem image,
>  		  size_t slice_pitch,
>  		  void* host_ptr)
>  {
> -  char* dst_ptr = cl_mem_map_auto(image);
> +  char* dst_ptr = cl_mem_map_auto(image, CL_TRUE);
> 
>    if (row_pitch == image->row_pitch &&
>        (image->depth == 1 || slice_pitch == image->slice_pitch))
> @@ -552,6 +552,14 @@ cl_mem_map_gtt(cl_mem mem)
>    return cl_buffer_get_virtual(mem->bo);
>  }
> 
> +LOCAL void *
> +cl_mem_map_gtt_unsync(cl_mem mem)
> +{
> +  cl_buffer_map_gtt_unsync(mem->bo);
> +  assert(cl_buffer_get_virtual(mem->bo));
> +  return cl_buffer_get_virtual(mem->bo);
> +}
> +
>  LOCAL cl_int
>  cl_mem_unmap_gtt(cl_mem mem)
>  {
> @@ -560,8 +568,10 @@ cl_mem_unmap_gtt(cl_mem mem)
>  }
> 
>  LOCAL void*
> -cl_mem_map_auto(cl_mem mem)
> +cl_mem_map_auto(cl_mem mem, cl_bool sync)
>  {
> +  if(sync == CL_FALSE)
> +    return cl_mem_map_gtt_unsync(mem);  //drm only support map gtt
> unsync map
>    if (mem->is_image && mem->tiling != CL_NO_TILE)
>      return cl_mem_map_gtt(mem);
>    else
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 1b1709a..1826306 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -123,11 +123,14 @@ extern cl_int cl_mem_unmap(cl_mem);
>  /* Directly map a memory object in GTT mode */
>  extern void *cl_mem_map_gtt(cl_mem);
> 
> +/* Directly map a memory object in GTT mode, with out waiting gpu idle */
> +extern void *cl_mem_map_gtt_unsync(cl_mem);
> +
>  /* Unmap a memory object in GTT mode */
>  extern cl_int cl_mem_unmap_gtt(cl_mem);
> 
>  /* Directly map a memory object - tiled images are mapped in GTT mode */
> -extern void *cl_mem_map_auto(cl_mem);
> +extern void *cl_mem_map_auto(cl_mem, cl_bool);
> 
>  /* Unmap a memory object - tiled images are unmapped in GTT mode */
>  extern cl_int cl_mem_unmap_auto(cl_mem);
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index 6c6b9fb..9959447 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -519,6 +519,7 @@ intel_setup_callbacks(void)
>    cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
>    cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *)
> drm_intel_gem_bo_map_gtt;
>    cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *)
> drm_intel_gem_bo_unmap_gtt;
> +  cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *)
> drm_intel_gem_bo_map_unsynchronized;
>    cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *)
> drm_intel_bo_get_virtual;
>    cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
>    cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
> --
> 1.8.1.2
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list