[Beignet] [PATCH] Fix map gtt fail when memory object size is too large.
Zhigang Gong
zhigang.gong at linux.intel.com
Wed May 21 22:45:36 PDT 2014
LGTM, will push latter. Thanks.
On Tue, May 20, 2014 at 10:46:19AM +0800, Yang Rong wrote:
> After max allocate size is changed to 256M, the large memory object would map gtt
> fail in some system. So when image size is large then 128M, disable tiling, and
> used normal map. But in function clEnqueueMapBuffer/Image, may still fail because
> unsync map.
>
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
> src/cl_api.c | 55 ++++++++++++++++++++++++++++++++-----------------------
> src/cl_enqueue.c | 27 ++++++++++++++++++---------
> src/cl_enqueue.h | 1 +
> src/cl_mem.c | 18 ++++++++++++++++--
> src/cl_mem.h | 1 +
> 5 files changed, 68 insertions(+), 34 deletions(-)
>
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 03a1cda..0800c09 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -2161,7 +2161,7 @@ error:
> return err;
> }
>
> -static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, size_t size)
> +static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset, size_t size)
> {
> cl_int slot = -1;
> int err = CL_SUCCESS;
> @@ -2172,17 +2172,13 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
> sub_offset = buffer->sub_offset;
> }
>
> - if (!(*ptr = cl_mem_map_gtt_unsync(mem))) {
> - err = CL_MAP_FAILURE;
> - goto error;
> - }
> - *ptr = (char*)(*ptr) + offset + sub_offset;
> + ptr = (char*)ptr + offset + sub_offset;
> if(mem->flags & CL_MEM_USE_HOST_PTR) {
> assert(mem->host_ptr);
> //only calc ptr here, will do memcpy in enqueue
> *mem_ptr = mem->host_ptr + offset + sub_offset;
> } else {
> - *mem_ptr = *ptr;
> + *mem_ptr = ptr;
> }
> /* Record the mapped address. */
> if (!mem->mapped_ptr_sz) {
> @@ -2190,7 +2186,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
> mem->mapped_ptr = (cl_mapped_ptr *)malloc(
> sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
> if (!mem->mapped_ptr) {
> - cl_mem_unmap_gtt(mem);
> + cl_mem_unmap_auto(mem);
> err = CL_OUT_OF_HOST_MEMORY;
> goto error;
> }
> @@ -2208,7 +2204,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
> cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
> sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
> if (!new_ptr) {
> - cl_mem_unmap_gtt (mem);
> + cl_mem_unmap_auto(mem);
> err = CL_OUT_OF_HOST_MEMORY;
> goto error;
> }
> @@ -2223,7 +2219,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
> }
> assert(slot != -1);
> mem->mapped_ptr[slot].ptr = *mem_ptr;
> - mem->mapped_ptr[slot].v_ptr = *ptr;
> + mem->mapped_ptr[slot].v_ptr = ptr;
> mem->mapped_ptr[slot].size = size;
> mem->map_ref++;
> error:
> @@ -2270,10 +2266,6 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> goto error;
> }
>
> - err = _cl_map_mem(buffer, &ptr, &mem_ptr, offset, size);
> - if (err != CL_SUCCESS)
> - goto error;
> -
> TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
>
> data = &no_wait_data;
> @@ -2282,12 +2274,25 @@ clEnqueueMapBuffer(cl_command_queue command_queue,
> data->offset = offset;
> data->size = size;
> data->ptr = ptr;
> + data->unsync_map = 1;
>
> if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
> + data->unsync_map = 0;
> err = cl_enqueue_handle(event ? *event : NULL, data);
> + if (err != CL_SUCCESS)
> + goto error;
> + ptr = data->ptr;
> if(event) cl_event_set_status(*event, CL_COMPLETE);
> + } else {
> + if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
> + err = CL_MAP_FAILURE;
> + goto error;
> + }
> }
> + err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size);
> + if (err != CL_SUCCESS)
> + goto error;
>
> error:
> if (errcode_ret)
> @@ -2344,11 +2349,6 @@ clEnqueueMapImage(cl_command_queue command_queue,
> goto error;
> }
>
> - if (!(ptr = cl_mem_map_gtt_unsync(mem))) {
> - err = CL_MAP_FAILURE;
> - goto error;
> - }
> -
> size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
> size_t size;
> if(region[2] == 1) {
> @@ -2362,10 +2362,6 @@ clEnqueueMapImage(cl_command_queue command_queue,
> size += image->bpp * (origin[0] + region[0]);
> }
>
> - err = _cl_map_mem(mem, &ptr, &mem_ptr, offset, size);
> - if (err != CL_SUCCESS)
> - goto error;
> -
> TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
>
> data = &no_wait_data;
> @@ -2378,12 +2374,25 @@ clEnqueueMapImage(cl_command_queue command_queue,
> data->slice_pitch = *image_slice_pitch;
> data->ptr = ptr;
> data->offset = offset;
> + data->unsync_map = 1;
>
> if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
> event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
> + data->unsync_map = 0;
> err = cl_enqueue_handle(event ? *event : NULL, data);
> + if (err != CL_SUCCESS)
> + goto error;
> + ptr = data->ptr;
> if(event) cl_event_set_status(*event, CL_COMPLETE);
> + } else {
> + if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
> + err = CL_MAP_FAILURE;
> + goto error;
> + }
> }
> + err = _cl_map_mem(mem, ptr, &mem_ptr, offset, size);
> + if (err != CL_SUCCESS)
> + goto error;
>
> error:
> if (errcode_ret)
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index 330d230..800668d 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -246,17 +246,21 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
> mem->type == CL_MEM_SUBBUFFER_TYPE);
> struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
>
> - //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> - if (!(ptr = cl_mem_map_gtt(mem))) {
> + if(data->unsync_map == 1)
> + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> + ptr = cl_mem_map_gtt(mem);
> + else
> + ptr = cl_mem_map_auto(mem);
> +
> + if (ptr == NULL) {
> err = CL_MAP_FAILURE;
> goto error;
> }
> -
> - ptr = (char*)ptr + data->offset + buffer->sub_offset;
> - assert(data->ptr == ptr);
> + data->ptr = ptr;
>
> if(mem->flags & CL_MEM_USE_HOST_PTR) {
> assert(mem->host_ptr);
> + ptr = (char*)ptr + data->offset + buffer->sub_offset;
> memcpy(mem->host_ptr + data->offset, ptr, data->size);
> }
>
> @@ -271,12 +275,17 @@ cl_int cl_enqueue_map_image(enqueue_data *data)
> void *ptr = NULL;
> CHECK_IMAGE(mem, image);
>
> - if (!(ptr = cl_mem_map_gtt(mem))) {
> + if(data->unsync_map == 1)
> + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> + ptr = cl_mem_map_gtt(mem);
> + else
> + ptr = cl_mem_map_auto(mem);
> +
> + if (ptr == NULL) {
> err = CL_MAP_FAILURE;
> goto error;
> }
> -
> - assert(data->ptr == (char*)ptr + data->offset);
> + data->ptr = ptr;
>
> if(mem->flags & CL_MEM_USE_HOST_PTR) {
> assert(mem->host_ptr);
> @@ -323,7 +332,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
> assert(v_ptr == mapped_ptr);
> }
>
> - cl_mem_unmap_gtt(memobj);
> + cl_mem_unmap_auto(memobj);
>
> /* shrink the mapped slot. */
> if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
> index 1d3ae5f..c7e33da 100644
> --- a/src/cl_enqueue.h
> +++ b/src/cl_enqueue.h
> @@ -60,6 +60,7 @@ typedef struct _enqueue_data {
> const void * const_ptr; /* Const ptr for memory read */
> void * ptr; /* Ptr for write and return value */
> const cl_mem* mem_list; /* mem_list of clEnqueueNativeKernel */
> + uint8_t unsync_map; /* Indicate the clEnqueueMapBuffer/Image is unsync map */
> void (*user_func)(void *); /* pointer to a host-callable user function */
> } enqueue_data;
>
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 6b8ca7c..7092385 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -48,6 +48,8 @@
> #define CL_MEM_OBJECT_IMAGE2D 0x10F1
> #define CL_MEM_OBJECT_IMAGE3D 0x10F2
>
> +#define MAX_TILING_SIZE 128 * MB
> +
> static cl_mem_object_type
> cl_get_mem_object_type(cl_mem mem)
> {
> @@ -622,6 +624,15 @@ _cl_mem_new_image(cl_context ctx,
>
> sz = aligned_pitch * aligned_h * depth;
>
> + /* If sz is large than 128MB, map gtt may fail in some system.
> + Because there is no obviours performance drop, disable tiling. */
> + if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) {
> + tiling = CL_NO_TILE;
> + aligned_pitch = w * bpp;
> + aligned_h = h;
> + sz = aligned_pitch * aligned_h * depth;
> + }
> +
> mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
> if (mem == NULL || err != CL_SUCCESS)
> goto error;
> @@ -714,7 +725,7 @@ cl_mem_delete(cl_mem mem)
> for(i=0; i<mem->mapped_ptr_sz; i++) {
> if(mem->mapped_ptr[i].ptr != NULL) {
> mem->map_ref--;
> - cl_mem_unmap_gtt(mem);
> + cl_mem_unmap_auto(mem);
> }
> }
> assert(mem->map_ref == 0);
> @@ -1326,6 +1337,7 @@ cl_mem_map_gtt(cl_mem mem)
> {
> cl_buffer_map_gtt(mem->bo);
> assert(cl_buffer_get_virtual(mem->bo));
> + mem->mapped_gtt = 1;
> return cl_buffer_get_virtual(mem->bo);
> }
>
> @@ -1356,8 +1368,10 @@ cl_mem_map_auto(cl_mem mem)
> LOCAL cl_int
> cl_mem_unmap_auto(cl_mem mem)
> {
> - if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
> + if (mem->mapped_gtt == 1) {
> cl_buffer_unmap_gtt(mem->bo);
> + mem->mapped_gtt = 0;
> + }
> else
> cl_buffer_unmap(mem->bo);
> return CL_SUCCESS;
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 47a30dc..5719c60 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -87,6 +87,7 @@ typedef struct _cl_mem {
> cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
> int mapped_ptr_sz; /* The array size of mapped_ptr. */
> int map_ref; /* The mapped count. */
> + uint8_t mapped_gtt; /* This object has mapped gtt, for unmap. */
> cl_mem_dstr_cb *dstr_cb; /* The destroy callback. */
> } _cl_mem;
>
> --
> 1.9.1
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list