[Beignet] [PATCH V2 1/2] Add clEnqueueMapBuffer and clEnqueueMapImage non-blocking map support.
He Junyan
junyan.he at inbox.com
Wed Aug 28 20:10:27 PDT 2013
OK, this patch LGTM
First step to get the PTR and second to flush the GPU, so the ptr will
not change.
On Fri, 2013-08-23 at 11:04 +0800, Yang Rong wrote:
> There is a unsync map function drm_intel_gem_bo_map_unsynchronized in drm, that can
> be used to do non-blocking map. But this function only map gtt, so force to use map
> gtt for all clEnqueueMapBuffer and clEnqueueMapImage.
>
> V2: refined comment, and using map_gtt_unsync in clEnqueueMapBuffer/Image
> instead of map_auto to avoid confuse.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> src/cl_api.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++--
> src/cl_driver.h | 4 +++
> src/cl_driver_defs.c | 1 +
> src/cl_enqueue.c | 76 ++++++---------------------------------------
> src/cl_mem.c | 8 +++++
> src/cl_mem.h | 3 ++
> src/intel/intel_driver.c | 1 +
> 7 files changed, 105 insertions(+), 69 deletions(-)
>
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 4f048ee..67446e2 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1576,6 +1576,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> cl_int * errcode_ret)
> {
> cl_int err = CL_SUCCESS;
> + void *ptr = NULL;
> + void *mem_ptr = NULL;
> + cl_int slot = -1;
> enqueue_data *data, no_wait_data = { 0 };
>
> CHECK_QUEUE(command_queue);
> @@ -1602,6 +1605,69 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> goto error;
> }
>
> + if (!(ptr = cl_mem_map_gtt_unsync(buffer))) {
> + err = CL_MAP_FAILURE;
> + goto error;
> + }
> +
> + ptr = (char*)ptr + offset;
> +
> + if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> + assert(buffer->host_ptr);
> + //only calc ptr here, will do memcpy in enqueue
> + mem_ptr = buffer->host_ptr + offset;
> + } else {
> + mem_ptr = ptr;
> + }
> +
> + /* Record the mapped address. */
> + if (!buffer->mapped_ptr_sz) {
> + buffer->mapped_ptr_sz = 16;
> + buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> + if (!buffer->mapped_ptr) {
> + cl_mem_unmap_gtt (buffer);
> + err = CL_OUT_OF_HOST_MEMORY;
> + ptr = NULL;
> + goto error;
> + }
> +
> + memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + slot = 0;
> + } else {
> + int i = 0;
> + for (; i < buffer->mapped_ptr_sz; i++) {
> + if (buffer->mapped_ptr[i].ptr == NULL) {
> + slot = i;
> + break;
> + }
> + }
> +
> + if (i == buffer->mapped_ptr_sz) {
> + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> + if (!new_ptr) {
> + cl_mem_unmap_gtt (buffer);
> + err = CL_OUT_OF_HOST_MEMORY;
> + ptr = NULL;
> + goto error;
> + }
> + memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + memcpy(new_ptr, buffer->mapped_ptr,
> + buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> + slot = buffer->mapped_ptr_sz;
> + buffer->mapped_ptr_sz *= 2;
> + free(buffer->mapped_ptr);
> + buffer->mapped_ptr = new_ptr;
> + }
> + }
> +
> + assert(slot != -1);
> + buffer->mapped_ptr[slot].ptr = mem_ptr;
> + buffer->mapped_ptr[slot].v_ptr = ptr;
> + buffer->mapped_ptr[slot].size = size;
> + buffer->map_ref++;
> +
> TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
>
> data = &no_wait_data;
> @@ -1610,6 +1676,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> data->offset = offset;
> data->size = size;
> data->map_flags = map_flags;
> + data->ptr = ptr;
>
> if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
> @@ -1620,7 +1687,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> error:
> if (errcode_ret)
> *errcode_ret = err;
> - return data->ptr;
> + return mem_ptr;
> }
>
> void *
> @@ -1638,6 +1705,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
> cl_int * errcode_ret)
> {
> cl_int err = CL_SUCCESS;
> + void *ptr = NULL;
> enqueue_data *data, no_wait_data = { 0 };
>
> CHECK_QUEUE(command_queue);
> @@ -1673,6 +1741,14 @@ clEnqueueMapImage(cl_command_queue command_queue,
> goto error;
> }
>
> + if (!(ptr = cl_mem_map_gtt_unsync(image))) {
> + err = CL_MAP_FAILURE;
> + goto error;
> + }
> +
> + size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
> + ptr = (char*)ptr + offset;
> +
> TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
>
> data = &no_wait_data;
> @@ -1683,6 +1759,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
> data->row_pitch = *image_row_pitch;
> data->slice_pitch = *image_slice_pitch;
> data->map_flags = map_flags;
> + data->ptr = ptr;
>
> if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
> @@ -1693,7 +1770,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
> error:
> if (errcode_ret)
> *errcode_ret = err;
> - return data->ptr; //TODO: map and unmap first
> + return ptr; //TODO: map and unmap first
> }
>
> cl_int
> diff --git a/src/cl_driver.h b/src/cl_driver.h
> index 1a0ec38..0ce03fe 100644
> --- a/src/cl_driver.h
> +++ b/src/cl_driver.h
> @@ -257,6 +257,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap;
> typedef int (cl_buffer_map_gtt_cb)(cl_buffer);
> extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt;
>
> +/* Map a buffer in the GTT domain, non waiting the GPU read or write*/
> +typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer);
> +extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync;
> +
> /* Unmap a buffer in the GTT domain */
> typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer);
> extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt;
> diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
> index e7412de..7c4c866 100644
> --- a/src/cl_driver_defs.c
> +++ b/src/cl_driver_defs.c
> @@ -36,6 +36,7 @@ LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
> LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
> LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
> LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL;
> +LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL;
> LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL;
> LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
> LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index a112cc4..a914e26 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -156,93 +156,35 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
>
> void *ptr = NULL;
> cl_int err = CL_SUCCESS;
> - void *mem_ptr = NULL;
> - cl_int slot = -1;
> cl_mem buffer = data->mem_obj;
> -
> - if (!(ptr = cl_mem_map_auto(buffer))) {
> + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> + if (!(ptr = cl_mem_map_gtt(buffer))) {
> err = CL_MAP_FAILURE;
> + goto error;
> }
> -
> ptr = (char*)ptr + data->offset;
> + assert(data->ptr == ptr);
>
> if(buffer->flags & CL_MEM_USE_HOST_PTR) {
> assert(buffer->host_ptr);
> memcpy(buffer->host_ptr + data->offset, ptr, data->size);
> - mem_ptr = buffer->host_ptr + data->offset;
> - } else {
> - mem_ptr = ptr;
> - }
> -
> - /* Record the mapped address. */
> - if (!buffer->mapped_ptr_sz) {
> - buffer->mapped_ptr_sz = 16;
> - buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
> - sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
> - if (!buffer->mapped_ptr) {
> - cl_mem_unmap_auto (buffer);
> - err = CL_OUT_OF_HOST_MEMORY;
> - ptr = NULL;
> - goto error;
> - }
> -
> - memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> - slot = 0;
> - } else {
> - int i = 0;
> - for (; i < buffer->mapped_ptr_sz; i++) {
> - if (buffer->mapped_ptr[i].ptr == NULL) {
> - slot = i;
> - break;
> - }
> - }
> -
> - if (i == buffer->mapped_ptr_sz) {
> - cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> - sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
> - if (!new_ptr) {
> - cl_mem_unmap_auto (buffer);
> - err = CL_OUT_OF_HOST_MEMORY;
> - ptr = NULL;
> - goto error;
> - }
> - memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> - memcpy(new_ptr, buffer->mapped_ptr,
> - buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
> - slot = buffer->mapped_ptr_sz;
> - buffer->mapped_ptr_sz *= 2;
> - free(buffer->mapped_ptr);
> - buffer->mapped_ptr = new_ptr;
> - }
> }
>
> - assert(slot != -1);
> - buffer->mapped_ptr[slot].ptr = mem_ptr;
> - buffer->mapped_ptr[slot].v_ptr = ptr;
> - buffer->mapped_ptr[slot].size = data->size;
> - buffer->map_ref++;
> -
> - data->ptr = mem_ptr;
> -
> error:
> return err;
> }
>
> cl_int cl_enqueue_map_image(enqueue_data *data)
> {
> - void *ptr = NULL;
> cl_int err = CL_SUCCESS;
> -
> cl_mem image = data->mem_obj;
> - const size_t *origin = data->origin;
> -
> - if (!(ptr = cl_mem_map_auto(image))) {
> + void *ptr = NULL;
> + //because using unsync map in clEnqueueMapImage, so force use map_gtt here
> + if (!(ptr = cl_mem_map_gtt(image))) {
> err = CL_MAP_FAILURE;
> goto error;
> }
> -
> - size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
> - data->ptr = (char*)ptr + offset;
> + assert(data->ptr == (char*)ptr + data->offset);
>
> error:
> return err;
> @@ -282,7 +224,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
> assert(v_ptr == mapped_ptr);
> }
>
> - cl_mem_unmap_auto(memobj);
> + cl_mem_unmap_gtt(memobj);
>
> /* shrink the mapped slot. */
> if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index f794ce7..4819096 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -552,6 +552,14 @@ cl_mem_map_gtt(cl_mem mem)
> return cl_buffer_get_virtual(mem->bo);
> }
>
> +LOCAL void *
> +cl_mem_map_gtt_unsync(cl_mem mem)
> +{
> + cl_buffer_map_gtt_unsync(mem->bo);
> + assert(cl_buffer_get_virtual(mem->bo));
> + return cl_buffer_get_virtual(mem->bo);
> +}
> +
> LOCAL cl_int
> cl_mem_unmap_gtt(cl_mem mem)
> {
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 1b1709a..d25f0ed 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -123,6 +123,9 @@ extern cl_int cl_mem_unmap(cl_mem);
> /* Directly map a memory object in GTT mode */
> extern void *cl_mem_map_gtt(cl_mem);
>
> +/* Directly map a memory object in GTT mode, with out waiting gpu idle */
> +extern void *cl_mem_map_gtt_unsync(cl_mem);
> +
> /* Unmap a memory object in GTT mode */
> extern cl_int cl_mem_unmap_gtt(cl_mem);
>
> diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
> index 6c6b9fb..9959447 100644
> --- a/src/intel/intel_driver.c
> +++ b/src/intel/intel_driver.c
> @@ -519,6 +519,7 @@ intel_setup_callbacks(void)
> cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
> cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt;
> cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt;
> + cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized;
> cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
> cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
> cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
More information about the Beignet
mailing list