[Beignet] [patch v2 2/3] enable create image 2d from buffer in clCreateImage.

Matt Turner mattst88 at gmail.com
Fri Aug 28 14:11:09 PDT 2015


On Fri, Aug 28, 2015 at 12:52 AM,  <xionghu.luo at intel.com> wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> this patch allows create 2d image with a cl buffer with zero copy.
> v2: should use reference to manage the release the buffer and image.
> After being created, the buffer reference count is 2, and image reference
> count is 1.
> if image is released first, decrease the image reference count and
> buffer reference count both, release the bo when the buffer is released
> at last;
> if buffer is released first, decrease the buffer reference count only,
> release the buffer when the image is released.
> add CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT in cl_device_info.
>
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
>  src/cl_api.c        |   3 +-
>  src/cl_device_id.c  |   2 +
>  src/cl_device_id.h  |   2 +
>  src/cl_extensions.c |   2 +
>  src/cl_gt_device.h  |   3 +-
>  src/cl_mem.c        | 109 +++++++++++++++++++++++++++++++++++++++-------------
>  src/cl_mem.h        |   1 +
>  7 files changed, 93 insertions(+), 29 deletions(-)
>
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 5c9b250..0690af4 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -549,8 +549,9 @@ clCreateImage(cl_context context,
>      goto error;
>    }
>    /* buffer refers to a valid buffer memory object if image_type is
> -     CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
> +     CL_MEM_OBJECT_IMAGE1D_BUFFER or CL_MEM_OBJECT_IMAGE2D. Otherwise it must be NULL. */
>    if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
> +      image_desc->image_type != CL_MEM_OBJECT_IMAGE2D &&
>           image_desc->buffer) {
>      err = CL_INVALID_IMAGE_DESCRIPTOR;
>      goto error;
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index 1778292..78d2cf4 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -810,6 +810,8 @@ cl_get_device_info(cl_device_id     device,
>      DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
>      DECL_FIELD(PARTITION_TYPE, partition_type)
>      DECL_FIELD(REFERENCE_COUNT, device_reference_count)
> +    DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
> +    DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment)
>
>      case CL_DRIVER_VERSION:
>        if (param_value_size_ret) {
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h
> index b5db91c..02d1e0f 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -116,6 +116,8 @@ struct _cl_device_id {
>    cl_device_partition_property partition_type[3];
>    cl_uint      device_reference_count;
>    uint32_t atomic_test_result;
> +  uint32_t image_pitch_alignment;
> +  uint32_t image_base_address_alignment;
>  };
>
>  /* Get a device from the given platform */
> diff --git a/src/cl_extensions.c b/src/cl_extensions.c
> index 3eb303f..6cb1579 100644
> --- a/src/cl_extensions.c
> +++ b/src/cl_extensions.c
> @@ -46,6 +46,8 @@ void check_opt1_extension(cl_extensions_t *extensions)
>      if (id == EXT_ID(khr_spir))
>        extensions->extensions[id].base.ext_enabled = 1;
>  #endif
> +    if (id == EXT_ID(khr_image2d_from_buffer))
> +      extensions->extensions[id].base.ext_enabled = 1;
>    }
>  }
>
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index a51843d..c2f9f56 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -126,4 +126,5 @@ DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
>  .affinity_domain = 0,
>  .partition_type = {0},
>  .device_reference_count = 1,
> -
> +.image_pitch_alignment = 1,
> +.image_base_address_alignment = 4096,
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index b5671bd..bb065f5 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -264,6 +264,7 @@ cl_mem_allocate(enum cl_mem_type type,
>    SET_ICD(mem->dispatch)
>    mem->ref_n = 1;
>    mem->magic = CL_MAGIC_MEM_HEADER;
> +  mem->is_image_from_buffer = 0;
>    mem->flags = flags;
>    mem->is_userptr = 0;
>    mem->offset = 0;
> @@ -308,10 +309,19 @@ cl_mem_allocate(enum cl_mem_type type,
>        }
>      }
>
> -    if (!mem->is_userptr)
> +    if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
> +      // if the image if created from buffer, should use the bo directly to share same bo.
> +      mem->bo = ((cl_mem)host_ptr)->bo;
> +      mem->is_image_from_buffer = 1;
> +    } else  if (!mem->is_userptr)
>        mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
>  #else
> -    mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
> +    if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
> +      // if the image if created from buffer, should use the bo directly to share same bo.
> +      mem->bo = ((cl_mem)host_ptr)->bo;
> +      mem->is_image_from_buffer = 1;
> +    } else
> +      mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
>  #endif
>
>      if (UNLIKELY(mem->bo == NULL)) {
> @@ -756,6 +766,8 @@ _cl_mem_new_image(cl_context ctx,
>        h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
>        w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
>        tiling = CL_NO_TILE;
> +    } else if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
> +      tiling = CL_NO_TILE;
>      } else if (cl_driver_get_ver(ctx->drv) != 6) {
>        /* Pick up tiling mode (we do only linear on SNB) */
>        tiling = cl_get_default_tiling(ctx->drv);
> @@ -804,7 +816,10 @@ _cl_mem_new_image(cl_context ctx,
>    /* Tiling requires to align both pitch and height */
>    if (tiling == CL_NO_TILE) {
>      aligned_pitch = w * bpp;
> -    aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
> +    if(image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
> +      aligned_h  = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
> +    else
> +      aligned_h = h;
>    } else if (tiling == CL_TILE_X) {
>      aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
>      aligned_h     = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
> @@ -814,6 +829,11 @@ _cl_mem_new_image(cl_context ctx,
>    }
>
>    sz = aligned_pitch * aligned_h * depth;
> +  if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER)  {
> +    //image 2d created from buffer: the buffer sz maybe larger than the image 2d.
> +    if( ((cl_mem)data)->size > sz)
> +      sz = ((cl_mem)data)->size;
> +  }
>
>    /* If sz is large than 128MB, map gtt may fail in some system.
>       Because there is no obviours performance drop, disable tiling. */
> @@ -825,7 +845,7 @@ _cl_mem_new_image(cl_context ctx,
>    }
>
>    if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
> -    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
> +    mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, &err);
>    else {
>      mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
>      if (mem != NULL && err == CL_SUCCESS) {
> @@ -837,7 +857,11 @@ _cl_mem_new_image(cl_context ctx,
>    if (mem == NULL || err != CL_SUCCESS)
>      goto error;
>
> -  cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
> +  if(!(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER))  {
> +    //no need set tiling if image 2d created from buffer since share same bo.
> +    cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
> +  }
> +
>    if (image_type == CL_MEM_OBJECT_IMAGE1D ||
>        image_type == CL_MEM_OBJECT_IMAGE2D ||
>        image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
> @@ -971,33 +995,51 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
>    if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
>      goto error;
>
> -  // Per bspec, a image should has a at least 2 line vertical alignment,
> -  // thus we can't simply attach a buffer to a 1d image surface which has the same size.
> -  // We have to create a new image, and copy the buffer data to this new image.
> -  // And replace all the buffer object's reference to this image.
> -  image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
> +  if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
> +    image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
> +                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
> +                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
> +                             image_desc->buffer, errcode_ret);
> +  } else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
> +    // Per bspec, a image should has a at least 2 line vertical alignment,
> +    // thus we can't simply attach a buffer to a 1d image surface which has the same size.
> +    // We have to create a new image, and copy the buffer data to this new image.
> +    // And replace all the buffer object's reference to this image.
> +    image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
>                      mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
> +  }
> +  else
> +    assert(0);
> +
>    if (image == NULL)
>      return NULL;
> -  void *src = cl_mem_map(buffer, 0);
> -  void *dst = cl_mem_map(image, 1);
> -  //
> -  // FIXME, we could use copy buffer to image to do this on GPU latter.
> -  // currently the copy buffer to image function doesn't support 1D image.
> -  //
> -  // There is a potential risk that this buffer was mapped and the caller
> -  // still hold the pointer and want to access it again. This scenario is
> -  // not explicitly forbidden in the spec, although it should not be permitted.
> -  memcpy(dst, src, mem_buffer->base.size);
> -  cl_mem_unmap(buffer);
> -  cl_mem_unmap(image);
> +
> +  if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D)  {
> +    //no need copy since the image 2d and buffer share same bo.
> +  }
> +  else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)  {
> +    // FIXME, we could use copy buffer to image to do this on GPU latter.
> +    // currently the copy buffer to image function doesn't support 1D image.
> +    //
> +    // There is a potential risk that this buffer was mapped and the caller
> +    // still hold the pointer and want to access it again. This scenario is
> +    // not explicitly forbidden in the spec, although it should not be permitted.
> +    void *src = cl_mem_map(buffer, 0);
> +    void *dst = cl_mem_map(image, 1);
> +    memcpy(dst, src, mem_buffer->base.size);
> +    cl_mem_unmap(image);
> +    cl_mem_unmap(buffer);
> +  }
> +  else
> +    assert(0);
>
>    if (err != 0)
>      goto error;
>
>    // Now replace buffer's bo to this new bo, need to take care of sub buffer
>    // case.
> -  cl_mem_replace_buffer(buffer, image->bo);
> +  if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
> +    cl_mem_replace_buffer(buffer, image->bo);
>    /* Now point to the right offset if buffer is a SUB_BUFFER. */
>    if (buffer->flags & CL_MEM_USE_HOST_PTR)
>      image->host_ptr = buffer->host_ptr + offset;
> @@ -1025,12 +1067,20 @@ cl_mem_new_image(cl_context context,
>  {
>    switch (image_desc->image_type) {
>    case CL_MEM_OBJECT_IMAGE1D:
> -  case CL_MEM_OBJECT_IMAGE2D:
>    case CL_MEM_OBJECT_IMAGE3D:
>      return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
>                               image_desc->image_width, image_desc->image_height, image_desc->image_depth,
>                               image_desc->image_row_pitch, image_desc->image_slice_pitch,
>                               host_ptr, errcode_ret);
> +  case CL_MEM_OBJECT_IMAGE2D:
> +    if(image_desc->buffer)
> +      return _cl_mem_new_image_from_buffer(context, flags, image_format,
> +                             image_desc, errcode_ret);
> +    else
> +      return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
> +                             image_desc->image_width, image_desc->image_height, image_desc->image_depth,
> +                             image_desc->image_row_pitch, image_desc->image_slice_pitch,
> +                             host_ptr, errcode_ret);
>    case CL_MEM_OBJECT_IMAGE1D_ARRAY:
>    case CL_MEM_OBJECT_IMAGE2D_ARRAY:
>      return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
> @@ -1065,9 +1115,14 @@ cl_mem_delete(cl_mem mem)
>    /* iff we are a image, delete the 1d buffer if has. */
>    if (IS_IMAGE(mem)) {
>      if (cl_mem_image(mem)->buffer_1d) {
> -      assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER);
> -      cl_mem_delete(cl_mem_image(mem)->buffer_1d);
> -      cl_mem_image(mem)->buffer_1d = NULL;
> +      assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
> +          cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
> +        cl_mem_delete(cl_mem_image(mem)->buffer_1d);
> +        if(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D && mem->is_image_from_buffer == 1)
> +        {
> +          cl_mem_image(mem)->buffer_1d = NULL;
> +          mem->bo = NULL;
> +        }

Indentation looks wrong here.


More information about the Beignet mailing list