[Beignet] [patch v2 2/3] enable create image 2d from buffer in clCreateImage.
Matt Turner
mattst88 at gmail.com
Fri Aug 28 14:11:09 PDT 2015
On Fri, Aug 28, 2015 at 12:52 AM, <xionghu.luo at intel.com> wrote:
> From: Luo Xionghu <xionghu.luo at intel.com>
>
> this patch allows create 2d image with a cl buffer with zero copy.
> v2: should use reference to manage the release the buffer and image.
> After being created, the buffer reference count is 2, and image reference
> count is 1.
> if image is released first, decrease the image reference count and
> buffer reference count both, release the bo when the buffer is released
> at last;
> if buffer is released first, decrease the buffer reference count only,
> release the buffer when the image is released.
> add CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT in cl_device_info.
>
> Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
> ---
> src/cl_api.c | 3 +-
> src/cl_device_id.c | 2 +
> src/cl_device_id.h | 2 +
> src/cl_extensions.c | 2 +
> src/cl_gt_device.h | 3 +-
> src/cl_mem.c | 109 +++++++++++++++++++++++++++++++++++++++-------------
> src/cl_mem.h | 1 +
> 7 files changed, 93 insertions(+), 29 deletions(-)
>
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 5c9b250..0690af4 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -549,8 +549,9 @@ clCreateImage(cl_context context,
> goto error;
> }
> /* buffer refers to a valid buffer memory object if image_type is
> - CL_MEM_OBJECT_IMAGE1D_BUFFER. Otherwise it must be NULL. */
> + CL_MEM_OBJECT_IMAGE1D_BUFFER or CL_MEM_OBJECT_IMAGE2D. Otherwise it must be NULL. */
> if (image_desc->image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER &&
> + image_desc->image_type != CL_MEM_OBJECT_IMAGE2D &&
> image_desc->buffer) {
> err = CL_INVALID_IMAGE_DESCRIPTOR;
> goto error;
> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index 1778292..78d2cf4 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -810,6 +810,8 @@ cl_get_device_info(cl_device_id device,
> DECL_FIELD(PARTITION_AFFINITY_DOMAIN, affinity_domain)
> DECL_FIELD(PARTITION_TYPE, partition_type)
> DECL_FIELD(REFERENCE_COUNT, device_reference_count)
> + DECL_FIELD(IMAGE_PITCH_ALIGNMENT, image_pitch_alignment)
> + DECL_FIELD(IMAGE_BASE_ADDRESS_ALIGNMENT, image_base_address_alignment)
>
> case CL_DRIVER_VERSION:
> if (param_value_size_ret) {
> diff --git a/src/cl_device_id.h b/src/cl_device_id.h
> index b5db91c..02d1e0f 100644
> --- a/src/cl_device_id.h
> +++ b/src/cl_device_id.h
> @@ -116,6 +116,8 @@ struct _cl_device_id {
> cl_device_partition_property partition_type[3];
> cl_uint device_reference_count;
> uint32_t atomic_test_result;
> + uint32_t image_pitch_alignment;
> + uint32_t image_base_address_alignment;
> };
>
> /* Get a device from the given platform */
> diff --git a/src/cl_extensions.c b/src/cl_extensions.c
> index 3eb303f..6cb1579 100644
> --- a/src/cl_extensions.c
> +++ b/src/cl_extensions.c
> @@ -46,6 +46,8 @@ void check_opt1_extension(cl_extensions_t *extensions)
> if (id == EXT_ID(khr_spir))
> extensions->extensions[id].base.ext_enabled = 1;
> #endif
> + if (id == EXT_ID(khr_image2d_from_buffer))
> + extensions->extensions[id].base.ext_enabled = 1;
> }
> }
>
> diff --git a/src/cl_gt_device.h b/src/cl_gt_device.h
> index a51843d..c2f9f56 100644
> --- a/src/cl_gt_device.h
> +++ b/src/cl_gt_device.h
> @@ -126,4 +126,5 @@ DECL_INFO_STRING(driver_version, LIBCL_DRIVER_VERSION_STRING)
> .affinity_domain = 0,
> .partition_type = {0},
> .device_reference_count = 1,
> -
> +.image_pitch_alignment = 1,
> +.image_base_address_alignment = 4096,
> diff --git a/src/cl_mem.c b/src/cl_mem.c
> index b5671bd..bb065f5 100644
> --- a/src/cl_mem.c
> +++ b/src/cl_mem.c
> @@ -264,6 +264,7 @@ cl_mem_allocate(enum cl_mem_type type,
> SET_ICD(mem->dispatch)
> mem->ref_n = 1;
> mem->magic = CL_MAGIC_MEM_HEADER;
> + mem->is_image_from_buffer = 0;
> mem->flags = flags;
> mem->is_userptr = 0;
> mem->offset = 0;
> @@ -308,10 +309,19 @@ cl_mem_allocate(enum cl_mem_type type,
> }
> }
>
> - if (!mem->is_userptr)
> + if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
> + // if the image if created from buffer, should use the bo directly to share same bo.
> + mem->bo = ((cl_mem)host_ptr)->bo;
> + mem->is_image_from_buffer = 1;
> + } else if (!mem->is_userptr)
> mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
> #else
> - mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
> + if(type == CL_MEM_IMAGE_TYPE && host_ptr && ((cl_mem)host_ptr)->magic == CL_MAGIC_MEM_HEADER) {
> + // if the image if created from buffer, should use the bo directly to share same bo.
> + mem->bo = ((cl_mem)host_ptr)->bo;
> + mem->is_image_from_buffer = 1;
> + } else
> + mem->bo = cl_buffer_alloc(bufmgr, "CL memory object", sz, alignment);
> #endif
>
> if (UNLIKELY(mem->bo == NULL)) {
> @@ -756,6 +766,8 @@ _cl_mem_new_image(cl_context ctx,
> h = (w + ctx->device->image2d_max_width - 1) / ctx->device->image2d_max_width;
> w = w > ctx->device->image2d_max_width ? ctx->device->image2d_max_width : w;
> tiling = CL_NO_TILE;
> + } else if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
> + tiling = CL_NO_TILE;
> } else if (cl_driver_get_ver(ctx->drv) != 6) {
> /* Pick up tiling mode (we do only linear on SNB) */
> tiling = cl_get_default_tiling(ctx->drv);
> @@ -804,7 +816,10 @@ _cl_mem_new_image(cl_context ctx,
> /* Tiling requires to align both pitch and height */
> if (tiling == CL_NO_TILE) {
> aligned_pitch = w * bpp;
> - aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
> + if(image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)
> + aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
> + else
> + aligned_h = h;
> } else if (tiling == CL_TILE_X) {
> aligned_pitch = ALIGN(w * bpp, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 0));
> aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_TILE_X, 1));
> @@ -814,6 +829,11 @@ _cl_mem_new_image(cl_context ctx,
> }
>
> sz = aligned_pitch * aligned_h * depth;
> + if(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER) {
> + //image 2d created from buffer: the buffer sz maybe larger than the image 2d.
> + if( ((cl_mem)data)->size > sz)
> + sz = ((cl_mem)data)->size;
> + }
>
> /* If sz is large than 128MB, map gtt may fail in some system.
> Because there is no obviours performance drop, disable tiling. */
> @@ -825,7 +845,7 @@ _cl_mem_new_image(cl_context ctx,
> }
>
> if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER)
> - mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
> + mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, data, &err);
> else {
> mem = cl_mem_allocate(CL_MEM_BUFFER1D_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, &err);
> if (mem != NULL && err == CL_SUCCESS) {
> @@ -837,7 +857,11 @@ _cl_mem_new_image(cl_context ctx,
> if (mem == NULL || err != CL_SUCCESS)
> goto error;
>
> - cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
> + if(!(image_type == CL_MEM_OBJECT_IMAGE2D && data && ((cl_mem)data)->magic == CL_MAGIC_MEM_HEADER)) {
> + //no need set tiling if image 2d created from buffer since share same bo.
> + cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
> + }
> +
> if (image_type == CL_MEM_OBJECT_IMAGE1D ||
> image_type == CL_MEM_OBJECT_IMAGE2D ||
> image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
> @@ -971,33 +995,51 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
> if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
> goto error;
>
> - // Per bspec, a image should has a at least 2 line vertical alignment,
> - // thus we can't simply attach a buffer to a 1d image surface which has the same size.
> - // We have to create a new image, and copy the buffer data to this new image.
> - // And replace all the buffer object's reference to this image.
> - image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
> + if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
> + image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
> + image_desc->image_width, image_desc->image_height, image_desc->image_depth,
> + image_desc->image_row_pitch, image_desc->image_slice_pitch,
> + image_desc->buffer, errcode_ret);
> + } else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
> + // Per bspec, a image should has a at least 2 line vertical alignment,
> + // thus we can't simply attach a buffer to a 1d image surface which has the same size.
> + // We have to create a new image, and copy the buffer data to this new image.
> + // And replace all the buffer object's reference to this image.
> + image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
> mem_buffer->base.size / bpp, 0, 0, 0, 0, NULL, errcode_ret);
> + }
> + else
> + assert(0);
> +
> if (image == NULL)
> return NULL;
> - void *src = cl_mem_map(buffer, 0);
> - void *dst = cl_mem_map(image, 1);
> - //
> - // FIXME, we could use copy buffer to image to do this on GPU latter.
> - // currently the copy buffer to image function doesn't support 1D image.
> - //
> - // There is a potential risk that this buffer was mapped and the caller
> - // still hold the pointer and want to access it again. This scenario is
> - // not explicitly forbidden in the spec, although it should not be permitted.
> - memcpy(dst, src, mem_buffer->base.size);
> - cl_mem_unmap(buffer);
> - cl_mem_unmap(image);
> +
> + if(image_desc->image_type == CL_MEM_OBJECT_IMAGE2D) {
> + //no need copy since the image 2d and buffer share same bo.
> + }
> + else if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER) {
> + // FIXME, we could use copy buffer to image to do this on GPU latter.
> + // currently the copy buffer to image function doesn't support 1D image.
> + //
> + // There is a potential risk that this buffer was mapped and the caller
> + // still hold the pointer and want to access it again. This scenario is
> + // not explicitly forbidden in the spec, although it should not be permitted.
> + void *src = cl_mem_map(buffer, 0);
> + void *dst = cl_mem_map(image, 1);
> + memcpy(dst, src, mem_buffer->base.size);
> + cl_mem_unmap(image);
> + cl_mem_unmap(buffer);
> + }
> + else
> + assert(0);
>
> if (err != 0)
> goto error;
>
> // Now replace buffer's bo to this new bo, need to take care of sub buffer
> // case.
> - cl_mem_replace_buffer(buffer, image->bo);
> + if (image_desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)
> + cl_mem_replace_buffer(buffer, image->bo);
> /* Now point to the right offset if buffer is a SUB_BUFFER. */
> if (buffer->flags & CL_MEM_USE_HOST_PTR)
> image->host_ptr = buffer->host_ptr + offset;
> @@ -1025,12 +1067,20 @@ cl_mem_new_image(cl_context context,
> {
> switch (image_desc->image_type) {
> case CL_MEM_OBJECT_IMAGE1D:
> - case CL_MEM_OBJECT_IMAGE2D:
> case CL_MEM_OBJECT_IMAGE3D:
> return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
> image_desc->image_width, image_desc->image_height, image_desc->image_depth,
> image_desc->image_row_pitch, image_desc->image_slice_pitch,
> host_ptr, errcode_ret);
> + case CL_MEM_OBJECT_IMAGE2D:
> + if(image_desc->buffer)
> + return _cl_mem_new_image_from_buffer(context, flags, image_format,
> + image_desc, errcode_ret);
> + else
> + return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
> + image_desc->image_width, image_desc->image_height, image_desc->image_depth,
> + image_desc->image_row_pitch, image_desc->image_slice_pitch,
> + host_ptr, errcode_ret);
> case CL_MEM_OBJECT_IMAGE1D_ARRAY:
> case CL_MEM_OBJECT_IMAGE2D_ARRAY:
> return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
> @@ -1065,9 +1115,14 @@ cl_mem_delete(cl_mem mem)
> /* iff we are a image, delete the 1d buffer if has. */
> if (IS_IMAGE(mem)) {
> if (cl_mem_image(mem)->buffer_1d) {
> - assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER);
> - cl_mem_delete(cl_mem_image(mem)->buffer_1d);
> - cl_mem_image(mem)->buffer_1d = NULL;
> + assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER ||
> + cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
> + cl_mem_delete(cl_mem_image(mem)->buffer_1d);
> + if(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D && mem->is_image_from_buffer == 1)
> + {
> + cl_mem_image(mem)->buffer_1d = NULL;
> + mem->bo = NULL;
> + }
Indentation looks wrong here.
More information about the Beignet
mailing list