[Beignet] [PATCH] Fix map gtt fail when memory object size is too large.
Yang Rong
rong.r.yang at intel.com
Mon May 19 19:46:19 PDT 2014
After max allocate size is changed to 256M, the large memory object would map gtt
fail in some system. So when image size is large then 128M, disable tiling, and
used normal map. But in function clEnqueueMapBuffer/Image, may still fail because
unsync map.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
src/cl_api.c | 55 ++++++++++++++++++++++++++++++++-----------------------
src/cl_enqueue.c | 27 ++++++++++++++++++---------
src/cl_enqueue.h | 1 +
src/cl_mem.c | 18 ++++++++++++++++--
src/cl_mem.h | 1 +
5 files changed, 68 insertions(+), 34 deletions(-)
diff --git a/src/cl_api.c b/src/cl_api.c
index 03a1cda..0800c09 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -2161,7 +2161,7 @@ error:
return err;
}
-static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, size_t size)
+static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset, size_t size)
{
cl_int slot = -1;
int err = CL_SUCCESS;
@@ -2172,17 +2172,13 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
sub_offset = buffer->sub_offset;
}
- if (!(*ptr = cl_mem_map_gtt_unsync(mem))) {
- err = CL_MAP_FAILURE;
- goto error;
- }
- *ptr = (char*)(*ptr) + offset + sub_offset;
+ ptr = (char*)ptr + offset + sub_offset;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
//only calc ptr here, will do memcpy in enqueue
*mem_ptr = mem->host_ptr + offset + sub_offset;
} else {
- *mem_ptr = *ptr;
+ *mem_ptr = ptr;
}
/* Record the mapped address. */
if (!mem->mapped_ptr_sz) {
@@ -2190,7 +2186,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
mem->mapped_ptr = (cl_mapped_ptr *)malloc(
sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
if (!mem->mapped_ptr) {
- cl_mem_unmap_gtt(mem);
+ cl_mem_unmap_auto(mem);
err = CL_OUT_OF_HOST_MEMORY;
goto error;
}
@@ -2208,7 +2204,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
if (!new_ptr) {
- cl_mem_unmap_gtt (mem);
+ cl_mem_unmap_auto(mem);
err = CL_OUT_OF_HOST_MEMORY;
goto error;
}
@@ -2223,7 +2219,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
}
assert(slot != -1);
mem->mapped_ptr[slot].ptr = *mem_ptr;
- mem->mapped_ptr[slot].v_ptr = *ptr;
+ mem->mapped_ptr[slot].v_ptr = ptr;
mem->mapped_ptr[slot].size = size;
mem->map_ref++;
error:
@@ -2270,10 +2266,6 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
goto error;
}
- err = _cl_map_mem(buffer, &ptr, &mem_ptr, offset, size);
- if (err != CL_SUCCESS)
- goto error;
-
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
data = &no_wait_data;
@@ -2282,12 +2274,25 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
data->offset = offset;
data->size = size;
data->ptr = ptr;
+ data->unsync_map = 1;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
+ data->unsync_map = 0;
err = cl_enqueue_handle(event ? *event : NULL, data);
+ if (err != CL_SUCCESS)
+ goto error;
+ ptr = data->ptr;
if(event) cl_event_set_status(*event, CL_COMPLETE);
+ } else {
+ if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
}
+ err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size);
+ if (err != CL_SUCCESS)
+ goto error;
error:
if (errcode_ret)
@@ -2344,11 +2349,6 @@ clEnqueueMapImage(cl_command_queue command_queue,
goto error;
}
- if (!(ptr = cl_mem_map_gtt_unsync(mem))) {
- err = CL_MAP_FAILURE;
- goto error;
- }
-
size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
size_t size;
if(region[2] == 1) {
@@ -2362,10 +2362,6 @@ clEnqueueMapImage(cl_command_queue command_queue,
size += image->bpp * (origin[0] + region[0]);
}
- err = _cl_map_mem(mem, &ptr, &mem_ptr, offset, size);
- if (err != CL_SUCCESS)
- goto error;
-
TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
data = &no_wait_data;
@@ -2378,12 +2374,25 @@ clEnqueueMapImage(cl_command_queue command_queue,
data->slice_pitch = *image_slice_pitch;
data->ptr = ptr;
data->offset = offset;
+ data->unsync_map = 1;
if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
+ data->unsync_map = 0;
err = cl_enqueue_handle(event ? *event : NULL, data);
+ if (err != CL_SUCCESS)
+ goto error;
+ ptr = data->ptr;
if(event) cl_event_set_status(*event, CL_COMPLETE);
+ } else {
+ if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
+ err = CL_MAP_FAILURE;
+ goto error;
+ }
}
+ err = _cl_map_mem(mem, ptr, &mem_ptr, offset, size);
+ if (err != CL_SUCCESS)
+ goto error;
error:
if (errcode_ret)
diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
index 330d230..800668d 100644
--- a/src/cl_enqueue.c
+++ b/src/cl_enqueue.c
@@ -246,17 +246,21 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
mem->type == CL_MEM_SUBBUFFER_TYPE);
struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
- //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
- if (!(ptr = cl_mem_map_gtt(mem))) {
+ if(data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem);
+
+ if (ptr == NULL) {
err = CL_MAP_FAILURE;
goto error;
}
-
- ptr = (char*)ptr + data->offset + buffer->sub_offset;
- assert(data->ptr == ptr);
+ data->ptr = ptr;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
+ ptr = (char*)ptr + data->offset + buffer->sub_offset;
memcpy(mem->host_ptr + data->offset, ptr, data->size);
}
@@ -271,12 +275,17 @@ cl_int cl_enqueue_map_image(enqueue_data *data)
void *ptr = NULL;
CHECK_IMAGE(mem, image);
- if (!(ptr = cl_mem_map_gtt(mem))) {
+ if(data->unsync_map == 1)
+ //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
+ ptr = cl_mem_map_gtt(mem);
+ else
+ ptr = cl_mem_map_auto(mem);
+
+ if (ptr == NULL) {
err = CL_MAP_FAILURE;
goto error;
}
-
- assert(data->ptr == (char*)ptr + data->offset);
+ data->ptr = ptr;
if(mem->flags & CL_MEM_USE_HOST_PTR) {
assert(mem->host_ptr);
@@ -323,7 +332,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
assert(v_ptr == mapped_ptr);
}
- cl_mem_unmap_gtt(memobj);
+ cl_mem_unmap_auto(memobj);
/* shrink the mapped slot. */
if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
index 1d3ae5f..c7e33da 100644
--- a/src/cl_enqueue.h
+++ b/src/cl_enqueue.h
@@ -60,6 +60,7 @@ typedef struct _enqueue_data {
const void * const_ptr; /* Const ptr for memory read */
void * ptr; /* Ptr for write and return value */
const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */
+ uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */
void (*user_func)(void *); /* pointer to a host-callable user function */
} enqueue_data;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 6b8ca7c..7092385 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -48,6 +48,8 @@
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
+#define MAX_TILING_SIZE 128 * MB
+
static cl_mem_object_type
cl_get_mem_object_type(cl_mem mem)
{
@@ -622,6 +624,15 @@ _cl_mem_new_image(cl_context ctx,
sz = aligned_pitch * aligned_h * depth;
+ /* If sz is large than 128MB, map gtt may fail in some system.
+ Because there is no obviours performance drop, disable tiling. */
+ if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) {
+ tiling = CL_NO_TILE;
+ aligned_pitch = w * bpp;
+ aligned_h = h;
+ sz = aligned_pitch * aligned_h * depth;
+ }
+
mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
if (mem == NULL || err != CL_SUCCESS)
goto error;
@@ -714,7 +725,7 @@ cl_mem_delete(cl_mem mem)
for(i=0; i<mem->mapped_ptr_sz; i++) {
if(mem->mapped_ptr[i].ptr != NULL) {
mem->map_ref--;
- cl_mem_unmap_gtt(mem);
+ cl_mem_unmap_auto(mem);
}
}
assert(mem->map_ref == 0);
@@ -1326,6 +1337,7 @@ cl_mem_map_gtt(cl_mem mem)
{
cl_buffer_map_gtt(mem->bo);
assert(cl_buffer_get_virtual(mem->bo));
+ mem->mapped_gtt = 1;
return cl_buffer_get_virtual(mem->bo);
}
@@ -1356,8 +1368,10 @@ cl_mem_map_auto(cl_mem mem)
LOCAL cl_int
cl_mem_unmap_auto(cl_mem mem)
{
- if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
+ if (mem->mapped_gtt == 1) {
cl_buffer_unmap_gtt(mem->bo);
+ mem->mapped_gtt = 0;
+ }
else
cl_buffer_unmap(mem->bo);
return CL_SUCCESS;
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 47a30dc..5719c60 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -87,6 +87,7 @@ typedef struct _cl_mem {
cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
int mapped_ptr_sz; /* The array size of mapped_ptr. */
int map_ref; /* The mapped count. */
+ uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
} _cl_mem;
--
1.9.1
More information about the Beignet
mailing list