[Beignet] [PATCH] Fix map gtt fail when memory object size is too large.

Zhigang Gong zhigang.gong at linux.intel.com
Wed May 21 22:45:36 PDT 2014


LGTM, will push latter. Thanks.

On Tue, May 20, 2014 at 10:46:19AM +0800, Yang Rong wrote:
> After max allocate size is changed to 256M, the large memory object would map gtt
> fail  in some system. So when image size is large then 128M, disable tiling, and
> used normal map. But in function clEnqueueMapBuffer/Image, may still fail because
> unsync map.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  src/cl_api.c     | 55 ++++++++++++++++++++++++++++++++-----------------------
>  src/cl_enqueue.c | 27 ++++++++++++++++++---------
>  src/cl_enqueue.h |  1 +
>  src/cl_mem.c     | 18 ++++++++++++++++--
>  src/cl_mem.h     |  1 +
>  5 files changed, 68 insertions(+), 34 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 03a1cda..0800c09 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -2161,7 +2161,7 @@ error:
>    return err;
>  }
>  
> -static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, size_t size)
> +static cl_int _cl_map_mem(cl_mem mem, void *ptr, void **mem_ptr, size_t offset, size_t size)
>  {
>    cl_int slot = -1;
>    int err = CL_SUCCESS;
> @@ -2172,17 +2172,13 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
>      sub_offset = buffer->sub_offset;
>    }
>  
> -  if (!(*ptr = cl_mem_map_gtt_unsync(mem))) {
> -    err = CL_MAP_FAILURE;
> -    goto error;
> -  }
> -  *ptr = (char*)(*ptr) + offset + sub_offset;
> +  ptr = (char*)ptr + offset + sub_offset;
>    if(mem->flags & CL_MEM_USE_HOST_PTR) {
>      assert(mem->host_ptr);
>      //only calc ptr here, will do memcpy in enqueue
>      *mem_ptr = mem->host_ptr + offset + sub_offset;
>    } else {
> -    *mem_ptr = *ptr;
> +    *mem_ptr = ptr;
>    }
>    /* Record the mapped address. */
>    if (!mem->mapped_ptr_sz) {
> @@ -2190,7 +2186,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
>      mem->mapped_ptr = (cl_mapped_ptr *)malloc(
>            sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz);
>      if (!mem->mapped_ptr) {
> -      cl_mem_unmap_gtt(mem);
> +      cl_mem_unmap_auto(mem);
>        err = CL_OUT_OF_HOST_MEMORY;
>        goto error;
>      }
> @@ -2208,7 +2204,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
>        cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc(
>            sizeof(cl_mapped_ptr) * mem->mapped_ptr_sz * 2);
>        if (!new_ptr) {
> -        cl_mem_unmap_gtt (mem);
> +        cl_mem_unmap_auto(mem);
>          err = CL_OUT_OF_HOST_MEMORY;
>          goto error;
>        }
> @@ -2223,7 +2219,7 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset,
>    }
>    assert(slot != -1);
>    mem->mapped_ptr[slot].ptr = *mem_ptr;
> -  mem->mapped_ptr[slot].v_ptr = *ptr;
> +  mem->mapped_ptr[slot].v_ptr = ptr;
>    mem->mapped_ptr[slot].size = size;
>    mem->map_ref++;
>  error:
> @@ -2270,10 +2266,6 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>      goto error;
>    }
>  
> -  err = _cl_map_mem(buffer, &ptr, &mem_ptr, offset, size);
> -  if (err != CL_SUCCESS)
> -    goto error;
> -
>    TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, buffer->ctx);
>  
>    data = &no_wait_data;
> @@ -2282,12 +2274,25 @@ clEnqueueMapBuffer(cl_command_queue  command_queue,
>    data->offset      = offset;
>    data->size        = size;
>    data->ptr         = ptr;
> +  data->unsync_map  = 1;
>  
>    if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
>                     event, data, CL_COMMAND_MAP_BUFFER) == CL_ENQUEUE_EXECUTE_IMM) {
> +    data->unsync_map = 0;
>      err = cl_enqueue_handle(event ? *event : NULL, data);
> +    if (err != CL_SUCCESS)
> +      goto error;
> +    ptr = data->ptr;
>      if(event) cl_event_set_status(*event, CL_COMPLETE);
> +  } else {
> +    if ((ptr = cl_mem_map_gtt_unsync(buffer)) == NULL) {
> +      err = CL_MAP_FAILURE;
> +      goto error;
> +    }
>    }
> +  err = _cl_map_mem(buffer, ptr, &mem_ptr, offset, size);
> +  if (err != CL_SUCCESS)
> +    goto error;
>  
>  error:
>    if (errcode_ret)
> @@ -2344,11 +2349,6 @@ clEnqueueMapImage(cl_command_queue   command_queue,
>      goto error;
>    }
>  
> -  if (!(ptr = cl_mem_map_gtt_unsync(mem))) {
> -    err = CL_MAP_FAILURE;
> -    goto error;
> -  }
> -
>    size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2];
>    size_t size;
>    if(region[2] == 1) {
> @@ -2362,10 +2362,6 @@ clEnqueueMapImage(cl_command_queue   command_queue,
>      size += image->bpp * (origin[0] + region[0]);
>    }
>  
> -  err = _cl_map_mem(mem, &ptr, &mem_ptr, offset, size);
> -  if (err != CL_SUCCESS)
> -    goto error;
> -
>    TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, mem->ctx);
>  
>    data = &no_wait_data;
> @@ -2378,12 +2374,25 @@ clEnqueueMapImage(cl_command_queue   command_queue,
>      data->slice_pitch = *image_slice_pitch;
>    data->ptr         = ptr;
>    data->offset      = offset;
> +  data->unsync_map  = 1;
>  
>    if(handle_events(command_queue, num_events_in_wait_list, event_wait_list,
>                     event, data, CL_COMMAND_MAP_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) {
> +    data->unsync_map = 0;
>      err = cl_enqueue_handle(event ? *event : NULL, data);
> +    if (err != CL_SUCCESS)
> +      goto error;
> +    ptr = data->ptr;
>      if(event) cl_event_set_status(*event, CL_COMPLETE);
> +  } else {
> +    if ((ptr = cl_mem_map_gtt_unsync(mem)) == NULL) {
> +      err = CL_MAP_FAILURE;
> +      goto error;
> +    }
>    }
> +  err = _cl_map_mem(mem, ptr, &mem_ptr, offset, size);
> +  if (err != CL_SUCCESS)
> +    goto error;
>  
>  error:
>    if (errcode_ret)
> diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c
> index 330d230..800668d 100644
> --- a/src/cl_enqueue.c
> +++ b/src/cl_enqueue.c
> @@ -246,17 +246,21 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data)
>           mem->type == CL_MEM_SUBBUFFER_TYPE);
>    struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem;
>  
> -  //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> -  if (!(ptr = cl_mem_map_gtt(mem))) {
> +  if(data->unsync_map == 1)
> +    //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> +    ptr = cl_mem_map_gtt(mem);
> +  else
> +    ptr = cl_mem_map_auto(mem);
> +
> +  if (ptr == NULL) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> -
> -  ptr = (char*)ptr + data->offset + buffer->sub_offset;
> -  assert(data->ptr == ptr);
> +  data->ptr = ptr;
>  
>    if(mem->flags & CL_MEM_USE_HOST_PTR) {
>      assert(mem->host_ptr);
> +    ptr = (char*)ptr + data->offset + buffer->sub_offset;
>      memcpy(mem->host_ptr + data->offset, ptr, data->size);
>    }
>  
> @@ -271,12 +275,17 @@ cl_int cl_enqueue_map_image(enqueue_data *data)
>    void *ptr = NULL;
>    CHECK_IMAGE(mem, image);
>  
> -  if (!(ptr = cl_mem_map_gtt(mem))) {
> +  if(data->unsync_map == 1)
> +    //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here
> +    ptr = cl_mem_map_gtt(mem);
> +  else
> +    ptr = cl_mem_map_auto(mem);
> +
> +  if (ptr == NULL) {
>      err = CL_MAP_FAILURE;
>      goto error;
>    }
> -
> -  assert(data->ptr == (char*)ptr + data->offset);
> +  data->ptr = ptr;
>  
>    if(mem->flags & CL_MEM_USE_HOST_PTR) {
>      assert(mem->host_ptr);
> @@ -323,7 +332,7 @@ cl_int cl_enqueue_unmap_mem_object(enqueue_data *data)
>      assert(v_ptr == mapped_ptr);
>    }
>  
> -  cl_mem_unmap_gtt(memobj);
> +  cl_mem_unmap_auto(memobj);
>  
>    /* shrink the mapped slot. */
>    if (memobj->mapped_ptr_sz/2 > memobj->map_ref) {
> diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h
> index 1d3ae5f..c7e33da 100644
> --- a/src/cl_enqueue.h
> +++ b/src/cl_enqueue.h
> @@ -60,6 +60,7 @@ typedef struct _enqueue_data {
>    const void *      const_ptr;        /* Const ptr for memory read */
>    void *            ptr;              /* Ptr for write and return value */
>    const cl_mem*     mem_list;         /* mem_list of clEnqueueNativeKernel */
> +  uint8_t           unsync_map;       /* Indicate the clEnqueueMapBuffer/Image is unsync map */
>    void (*user_func)(void *);          /* pointer to a host-callable user function */
>  } enqueue_data;
>  
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index 6b8ca7c..7092385 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -48,6 +48,8 @@
>  #define CL_MEM_OBJECT_IMAGE2D                       0x10F1
>  #define CL_MEM_OBJECT_IMAGE3D                       0x10F2
>  
> +#define MAX_TILING_SIZE                             128 * MB
> +
>  static cl_mem_object_type
>  cl_get_mem_object_type(cl_mem mem)
>  {
> @@ -622,6 +624,15 @@ _cl_mem_new_image(cl_context ctx,
>  
>    sz = aligned_pitch * aligned_h * depth;
>  
> +  /* If sz is large than 128MB, map gtt may fail in some system.
> +     Because there is no obviours performance drop, disable tiling. */
> +  if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) {
> +    tiling = CL_NO_TILE;
> +    aligned_pitch = w * bpp;
> +    aligned_h     = h;
> +    sz = aligned_pitch * aligned_h * depth;
> +  }
> +
>    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, &err);
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
> @@ -714,7 +725,7 @@ cl_mem_delete(cl_mem mem)
>      for(i=0; i<mem->mapped_ptr_sz; i++) {
>        if(mem->mapped_ptr[i].ptr != NULL) {
>          mem->map_ref--;
> -        cl_mem_unmap_gtt(mem);
> +        cl_mem_unmap_auto(mem);
>        }
>      }
>      assert(mem->map_ref == 0);
> @@ -1326,6 +1337,7 @@ cl_mem_map_gtt(cl_mem mem)
>  {
>    cl_buffer_map_gtt(mem->bo);
>    assert(cl_buffer_get_virtual(mem->bo));
> +  mem->mapped_gtt = 1;
>    return cl_buffer_get_virtual(mem->bo);
>  }
>  
> @@ -1356,8 +1368,10 @@ cl_mem_map_auto(cl_mem mem)
>  LOCAL cl_int
>  cl_mem_unmap_auto(cl_mem mem)
>  {
> -  if (IS_IMAGE(mem) && cl_mem_image(mem)->tiling != CL_NO_TILE)
> +  if (mem->mapped_gtt == 1) {
>      cl_buffer_unmap_gtt(mem->bo);
> +    mem->mapped_gtt = 0;
> +  }
>    else
>      cl_buffer_unmap(mem->bo);
>    return CL_SUCCESS;
> diff --git a/src/cl_mem.h b/src/cl_mem.h
> index 47a30dc..5719c60 100644
> --- a/src/cl_mem.h
> +++ b/src/cl_mem.h
> @@ -87,6 +87,7 @@ typedef  struct _cl_mem {
>    cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */
>    int mapped_ptr_sz;        /* The array size of mapped_ptr. */
>    int map_ref;              /* The mapped count. */
> +  uint8_t mapped_gtt;       /* This object has mapped gtt, for unmap. */
>    cl_mem_dstr_cb *dstr_cb;  /* The destroy callback. */
>  } _cl_mem;
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list