[Beignet] [PATCH 1/2] Add clEnqueueMapBuffer and clEnqueueMapImage non-blocking map support.
Yang Rong
rong.r.yang at intel.com
Thu Aug 22 01:05:22 PDT 2013
There is a unsync map function drm_intel_gem_bo_map_unsynchronized in drm, that can
be used to do non-blocking map. But this function only map gtt, so force to use map
gtt for all clEnqueueMapBuffer and clEnqueueMapImage.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_api.c | 81 +++++++++++++++++++++++++++++++++++++++++++++--
src/cl_driver.h | 4 +++
src/cl_driver_defs.c | 1 +
src/cl_enqueue.c | 82 +++++++-----------------------------------------
src/cl_mem.c | 14 +++++++--
src/cl_mem.h | 5 ++-
src/intel/intel_driver.c | 1 +
7 files changed, 113 insertions(+), 75 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 4f048ee..d4fdb7f 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -1576,6 +1576,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
cl_int * errcode_ret)
{
cl_int err = CL_SUCCESS;
+ void *ptr = NULL;
+ void *mem_ptr = NULL;
+ cl_int slot = -1;
enqueue_data *data, no_wait_data = { 0 };
CHECK_QUEUE(command_queue);
@@ -1602,6 +1605,69 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
goto error;
}
+ if (!(ptr = cl_mem_map_auto(buffer, CL_FALSE))) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
+
+ ptr = (char*)ptr + offset;
+
+ if(buffer->flags & CL_MEM_USE_HOST_PTR) {
+ assert(buffer->host_ptr);
+ //memcpy(buffer->host_ptr + offset, ptr, size);
+ mem_ptr = buffer->host_ptr + offset;
+ } else {
+ mem_ptr = ptr;
+ }
+
+ /* Record the mapped address. */
+ if (!buffer->mapped_ptr_sz) {
+ buffer->mapped_ptr_sz = 16;
+ buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
+ if (!buffer->mapped_ptr) {
+ cl_mem_unmap_auto (buffer);
+ err = CL_OUT_OF_HOST_MEMORY;
+ ptr = NULL;
+ goto error;
+ }
+
+ memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ slot = 0;
+ } else {
+ int i = 0;
+ for (; i < buffer->mapped_ptr_sz; i++) {
+ if (buffer->mapped_ptr[i].ptr == NULL) {
+ slot = i;
+ break;
+ }
+ }
+
+ if (i == buffer->mapped_ptr_sz) {
+ cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
+ sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
+ if (!new_ptr) {
+ cl_mem_unmap_auto (buffer);
+ err = CL_OUT_OF_HOST_MEMORY;
+ ptr = NULL;
+ goto error;
+ }
+ memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ memcpy(new_ptr, buffer->mapped_ptr,
+ buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
+ slot = buffer->mapped_ptr_sz;
+ buffer->mapped_ptr_sz *= 2;
+ free(buffer->mapped_ptr);
+ buffer->mapped_ptr = new_ptr;
+ }
+ }
+
+ assert(slot != -1);
+ buffer->mapped_ptr[slot].ptr = mem_ptr;
+ buffer->mapped_ptr[slot].v_ptr = ptr;
+ buffer->mapped_ptr[slot].size = size;
+ buffer->map_ref++;
+
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
data = &no_wait_data;
@@ -1610,6 +1676,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
data->offset = offset;
data->size = size;
data->map_flags = map_flags;
+ data->ptr = ptr;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
@@ -1620,7 +1687,7 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
error:
if (errcode_ret)
*errcode_ret = err;
- return data->ptr;
+ return mem_ptr;
}
void *
@@ -1638,6 +1705,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
cl_int * errcode_ret)
{
cl_int err = CL_SUCCESS;
+ void *ptr = NULL;
enqueue_data *data, no_wait_data = { 0 };
CHECK_QUEUE(command_queue);
@@ -1673,6 +1741,14 @@ clEnqueueMapImage(cl_command_queue command_queue,
goto error;
}
+ if (!(ptr = cl_mem_map_auto(image, CL_FALSE))) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
+
+ size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
+ ptr = (char*)ptr + offset;
+
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, image->ctx);
data = &no_wait_data;
@@ -1683,6 +1759,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
data->row_pitch = *image_row_pitch;
data->slice_pitch = *image_slice_pitch;
data->map_flags = map_flags;
+ data->ptr = ptr;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_READ_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
@@ -1693,7 +1770,7 @@ clEnqueueMapImage(cl_command_queue command_queue,
error:
if (errcode_ret)
*errcode_ret = err;
- return data->ptr; //TODO: map and unmap first
+ return ptr; //TODO: map and unmap first
}
cl_int
diff --git a/src/cl_driver.h b/src/cl_driver.h
index 1a0ec38..0ce03fe 100644
--- a/src/cl_driver.h
+++ b/src/cl_driver.h
@@ -257,6 +257,10 @@ extern cl_buffer_unmap_cb *cl_buffer_unmap;
typedef int (cl_buffer_map_gtt_cb)(cl_buffer);
extern cl_buffer_map_gtt_cb *cl_buffer_map_gtt;
+/* Map a buffer in the GTT domain, non waiting the GPU read or write*/
+typedef int (cl_buffer_map_gtt_unsync_cb)(cl_buffer);
+extern cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync;
+
/* Unmap a buffer in the GTT domain */
typedef int (cl_buffer_unmap_gtt_cb)(cl_buffer);
extern cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt;
diff --git a/src/cl_driver_defs.c b/src/cl_driver_defs.c
index e7412de..7c4c866 100644
--- a/src/cl_driver_defs.c
+++ b/src/cl_driver_defs.c
@@ -36,6 +36,7 @@ LOCAL cl_buffer_unreference_cb *cl_buffer_unreference = NULL;
LOCAL cl_buffer_map_cb *cl_buffer_map = NULL;
LOCAL cl_buffer_unmap_cb *cl_buffer_unmap = NULL;
LOCAL cl_buffer_map_gtt_cb *cl_buffer_map_gtt = NULL;
+LOCAL cl_buffer_map_gtt_unsync_cb *cl_buffer_map_gtt_unsync = NULL;
LOCAL cl_buffer_unmap_gtt_cb *cl_buffer_unmap_gtt = NULL;
LOCAL cl_buffer_get_virtual_cb *cl_buffer_get_virtual = NULL;
LOCAL cl_buffer_get_size_cb *cl_buffer_get_size = NULL;
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index a112cc4..a1c2be9 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -32,7 +32,7 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data)
cl_int err = CL_SUCCESS;
void* src_ptr;
- if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(src_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) {
err = CL_MAP_FAILURE;
goto error;
}
@@ -50,7 +50,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data)
cl_int err = CL_SUCCESS;
void* dst_ptr;
- if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) {
+ if (!(dst_ptr = cl_mem_map_auto(data->mem_obj, CL_TRUE))) {
err = CL_MAP_FAILURE;
goto error;
}
@@ -72,7 +72,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data)
const size_t* origin = data->origin;
const size_t* region = data->region;
- if (!(src_ptr = cl_mem_map_auto(image))) {
+ if (!(src_ptr = cl_mem_map_auto(image, CL_TRUE))) {
err = CL_MAP_FAILURE;
goto error;
}
@@ -116,7 +116,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data)
const size_t *origin = data->origin;
const size_t *region = data->region;
- if (!(dst_ptr = cl_mem_map_auto(image))) {
+ if (!(dst_ptr = cl_mem_map_auto(image, CL_TRUE))) {
err = CL_MAP_FAILURE;
goto error;
}
@@ -156,93 +156,35 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
void *ptr = NULL;
cl_int err = CL_SUCCESS;
- void *mem_ptr = NULL;
- cl_int slot = -1;
cl_mem buffer = data->mem_obj;
-
- if (!(ptr = cl_mem_map_auto(buffer))) {
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ if (!(ptr = cl_mem_map_gtt(buffer))) {
err = CL_MAP_FAILURE;
+ goto error;
}
-
ptr = (char*)ptr + data->offset;
+ assert(data->ptr == ptr);
if(buffer->flags & CL_MEM_USE_HOST_PTR) {
assert(buffer->host_ptr);
memcpy(buffer->host_ptr + data->offset, ptr, data->size);
- mem_ptr = buffer->host_ptr + data->offset;
- } else {
- mem_ptr = ptr;
- }
-
- /* Record the mapped address. */
- if (!buffer->mapped_ptr_sz) {
- buffer->mapped_ptr_sz = 16;
- buffer->mapped_ptr = (cl_mapped_ptr *)malloc(
- sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz);
- if (!buffer->mapped_ptr) {
- cl_mem_unmap_auto (buffer);
- err = CL_OUT_OF_HOST_MEMORY;
- ptr = NULL;
- goto error;
- }
-
- memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- slot = 0;
- } else {
- int i = 0;
- for (; i < buffer->mapped_ptr_sz; i++) {
- if (buffer->mapped_ptr[i].ptr == NULL) {
- slot = i;
- break;
- }
- }
-
- if (i == buffer->mapped_ptr_sz) {
- cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
- sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2);
- if (!new_ptr) {
- cl_mem_unmap_auto (buffer);
- err = CL_OUT_OF_HOST_MEMORY;
- ptr = NULL;
- goto error;
- }
- memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- memcpy(new_ptr, buffer->mapped_ptr,
- buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr));
- slot = buffer->mapped_ptr_sz;
- buffer->mapped_ptr_sz *= 2;
- free(buffer->mapped_ptr);
- buffer->mapped_ptr = new_ptr;
- }
}
- assert(slot != -1);
- buffer->mapped_ptr[slot].ptr = mem_ptr;
- buffer->mapped_ptr[slot].v_ptr = ptr;
- buffer->mapped_ptr[slot].size = data->size;
- buffer->map_ref++;
-
- data->ptr = mem_ptr;
-
error:
return err;
}
cl_int cl_enqueue_map_image(enqueue_data *data)
{
- void *ptr = NULL;
cl_int err = CL_SUCCESS;
-
cl_mem image = data->mem_obj;
- const size_t *origin = data->origin;
-
- if (!(ptr = cl_mem_map_auto(image))) {
+ void *ptr = NULL;
+ //because using unsync map in clEnqueueMapImage, so force use map_gtt here
+ if (!(ptr = cl_mem_map_gtt(image))) {
err = CL_MAP_FAILURE;
goto error;
}
-
- size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
- data->ptr = (char*)ptr + offset;
+ assert(data->ptr == (char*)ptr + data->offset);
error:
return err;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index f794ce7..5ce25e4 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -291,7 +291,7 @@ cl_mem_copy_image(cl_mem image,
size_t slice_pitch,
void* host_ptr)
{
- char* dst_ptr = cl_mem_map_auto(image);
+ char* dst_ptr = cl_mem_map_auto(image, CL_TRUE);
if (row_pitch == image->row_pitch &&
(image->depth == 1 || slice_pitch == image->slice_pitch))
@@ -552,6 +552,14 @@ cl_mem_map_gtt(cl_mem mem)
return cl_buffer_get_virtual(mem->bo);
}
+LOCAL void *
+cl_mem_map_gtt_unsync(cl_mem mem)
+{
+ cl_buffer_map_gtt_unsync(mem->bo);
+ assert(cl_buffer_get_virtual(mem->bo));
+ return cl_buffer_get_virtual(mem->bo);
+}
+
LOCAL cl_int
cl_mem_unmap_gtt(cl_mem mem)
{
@@ -560,8 +568,10 @@ cl_mem_unmap_gtt(cl_mem mem)
}
LOCAL void*
-cl_mem_map_auto(cl_mem mem)
+cl_mem_map_auto(cl_mem mem, cl_bool sync)
{
+ if(sync == CL_FALSE)
+ return cl_mem_map_gtt_unsync(mem); //drm only support map gtt unsync map
if (mem->is_image && mem->tiling != CL_NO_TILE)
return cl_mem_map_gtt(mem);
else
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 1b1709a..1826306 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -123,11 +123,14 @@ extern cl_int cl_mem_unmap(cl_mem);
/* Directly map a memory object in GTT mode */
extern void *cl_mem_map_gtt(cl_mem);
+/* Directly map a memory object in GTT mode, with out waiting gpu idle */
+extern void *cl_mem_map_gtt_unsync(cl_mem);
+
/* Unmap a memory object in GTT mode */
extern cl_int cl_mem_unmap_gtt(cl_mem);
/* Directly map a memory object - tiled images are mapped in GTT mode */
-extern void *cl_mem_map_auto(cl_mem);
+extern void *cl_mem_map_auto(cl_mem, cl_bool);
/* Unmap a memory object - tiled images are unmapped in GTT mode */
extern cl_int cl_mem_unmap_auto(cl_mem);
diff --git a/src/intel/intel_driver.c b/src/intel/intel_driver.c
index 6c6b9fb..9959447 100644
--- a/src/intel/intel_driver.c
+++ b/src/intel/intel_driver.c
@@ -519,6 +519,7 @@ intel_setup_callbacks(void)
cl_buffer_unmap = (cl_buffer_unmap_cb *) drm_intel_bo_unmap;
cl_buffer_map_gtt = (cl_buffer_map_gtt_cb *) drm_intel_gem_bo_map_gtt;
cl_buffer_unmap_gtt = (cl_buffer_unmap_gtt_cb *) drm_intel_gem_bo_unmap_gtt;
+ cl_buffer_map_gtt_unsync = (cl_buffer_map_gtt_unsync_cb *) drm_intel_gem_bo_map_unsynchronized;
cl_buffer_get_virtual = (cl_buffer_get_virtual_cb *) drm_intel_bo_get_virtual;
cl_buffer_get_size = (cl_buffer_get_size_cb *) drm_intel_bo_get_size;
cl_buffer_pin = (cl_buffer_pin_cb *) drm_intel_bo_pin;
--
1.8.1.2
More information about the Beignet
mailing list