[Beignet] [PATCH 3/6] Create image with TILE_Y mode still when image size > 128MB for performance.
yan.wang at linux.intel.com
yan.wang at linux.intel.com
Tue May 9 10:02:51 UTC 2017
From: Yan Wang <yan.wang at linux.intel.com>
It may failed to copy data from host ptr to TILE_Y large image.
So use clCopyBufferToImage to do this on GPU side.
Signed-off-by: Yan Wang <yan.wang at linux.intel.com>
---
src/cl_mem.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
src/cl_mem.h | 2 ++
2 files changed, 97 insertions(+), 5 deletions(-)
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 4a7bec8..fe0dd2f 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -149,6 +149,8 @@ cl_mem_allocate(enum cl_mem_type type,
mem->is_userptr = 0;
mem->offset = 0;
mem->is_svm = 0;
+ mem->is_ker_copy = 0;
+ mem->tmp_ker_buf = NULL;
mem->cmrt_mem = NULL;
if (mem->type == CL_MEM_IMAGE_TYPE) {
cl_mem_image(mem)->is_image_from_buffer = 0;
@@ -750,6 +752,77 @@ cl_image_tiling_t cl_get_default_tiling(cl_driver drv)
return tiling;
}
+cl_command_queue image_queue = NULL;
+
+static cl_mem
+_cl_new_image_copy_from_host_ptr(cl_context ctx,
+ cl_mem_flags flags,
+ const cl_image_format *fmt,
+ const cl_mem_object_type image_type,
+ size_t w,
+ size_t h,
+ size_t depth,
+ size_t pitch,
+ size_t slice_pitch,
+ size_t sz,
+ size_t aligned_pitch,
+ uint32_t intel_fmt,
+ uint32_t bpp,
+ cl_image_tiling_t tiling,
+ void *data, //pointer from application
+ cl_int *errcode_ret)
+{
+ cl_int err = CL_SUCCESS;
+ cl_mem mem = NULL;
+ size_t origin[3] = {0, 0, 0};
+ size_t region[3] = {w, h, depth};
+ size_t aligned_slice_pitch = 0;
+
+ if (image_queue == NULL) {
+ image_queue = clCreateCommandQueueWithProperties(ctx, ctx->devices[0], 0, &err);
+ if (err != CL_SUCCESS) {
+ *errcode_ret = err;
+ return NULL;
+ }
+ }
+
+ // Map host ptr to OCL buffer
+ cl_mem buf = clCreateBuffer(ctx, CL_MEM_USE_HOST_PTR, sz, data, &err);
+ if (err != CL_SUCCESS) {
+ *errcode_ret = err;
+ return NULL;
+ }
+
+ mem = cl_mem_allocate(CL_MEM_IMAGE_TYPE, ctx, flags, sz, tiling != CL_NO_TILE, NULL, NULL, &err);
+ if (mem == NULL || err != CL_SUCCESS) {
+ clReleaseMemObject(buf);
+ return NULL;
+ }
+
+ cl_buffer_set_tiling(mem->bo, tiling, aligned_pitch);
+
+ if (image_type == CL_MEM_OBJECT_IMAGE2D)
+ aligned_slice_pitch = 0;
+ else
+ //SKL need use tiling's aligned_h to calc slice_pitch and IVB to BDW need CL_NO_TILE's aligned_h to calc.
+ aligned_slice_pitch = aligned_pitch * ALIGN(h, cl_buffer_get_tiling_align(ctx, tiling, 2));
+
+ cl_mem_image_init(cl_mem_image(mem), w, h, image_type, depth, *fmt,
+ intel_fmt, bpp, aligned_pitch, aligned_slice_pitch, tiling,
+ 0, 0, 0);
+
+ err = clEnqueueCopyBufferToImage(image_queue, buf, mem, 0, origin, region, 0, NULL, NULL);
+ if(err != CL_SUCCESS) {
+ clReleaseMemObject(buf);
+ clReleaseMemObject(mem);
+ return NULL;
+ }
+
+ mem->is_ker_copy = 1;
+ clReleaseMemObject(buf);
+ return mem;
+}
+
static cl_mem
_cl_mem_new_image(cl_context ctx,
cl_mem_flags flags,
@@ -765,6 +838,7 @@ _cl_mem_new_image(cl_context ctx,
cl_int *errcode_ret)
{
cl_int err = CL_SUCCESS;
+ cl_bool is_ker_copy = 0;
cl_mem mem = NULL;
cl_mem_object_type image_type = orig_image_type;
uint32_t bpp = 0, intel_fmt = INTEL_UNSUPPORTED_FORMAT;
@@ -931,11 +1005,22 @@ _cl_mem_new_image(cl_context ctx,
/* If sz is large than 128MB, map gtt may fail in some system.
Because there is no obviours performance drop, disable tiling. */
- if(tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) {
- tiling = CL_NO_TILE;
- aligned_pitch = w * bpp;
- aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
- sz = aligned_pitch * aligned_h * depth;
+ if (tiling != CL_NO_TILE && sz > MAX_TILING_SIZE) {
+ if ((image_type == CL_MEM_OBJECT_IMAGE2D || image_type == CL_MEM_OBJECT_IMAGE3D) &&
+ (flags & CL_MEM_COPY_HOST_PTR)) {
+ mem = _cl_new_image_copy_from_host_ptr(ctx, flags, fmt, image_type, w, h, depth, pitch,
+ slice_pitch, sz, aligned_pitch, intel_fmt, bpp, tiling, data, &err);
+ if (mem != NULL)
+ goto exit;
+ }
+
+ if (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_COPY_HOST_PTR)) {
+ tiling = CL_NO_TILE;
+ aligned_pitch = w * bpp;
+ aligned_h = ALIGN(h, cl_buffer_get_tiling_align(ctx, CL_NO_TILE, 1));
+ sz = aligned_pitch * aligned_h * depth;
+ } else
+ is_ker_copy = 1;
}
if (image_type != CL_MEM_OBJECT_IMAGE1D_BUFFER) {
@@ -992,6 +1077,8 @@ _cl_mem_new_image(cl_context ctx,
cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data);
}
+ mem->is_ker_copy = is_ker_copy;
+
exit:
if (errcode_ret)
*errcode_ret = err;
@@ -1368,6 +1455,9 @@ cl_mem_delete(cl_mem mem)
cl_free(cb);
}
+ if (mem->tmp_ker_buf)
+ cl_mem_delete(mem->tmp_ker_buf);
+
/* iff we are a image, delete the 1d buffer if has. */
if (IS_IMAGE(mem)) {
if (cl_mem_image(mem)->buffer_1d) {
diff --git a/src/cl_mem.h b/src/cl_mem.h
index edfd043..20afe23 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -98,6 +98,8 @@ typedef struct _cl_mem {
list_head dstr_cb_head; /* All destroy callbacks. */
uint8_t is_userptr; /* CL_MEM_USE_HOST_PTR is enabled */
cl_bool is_svm; /* This object is svm */
+ cl_bool is_ker_copy; /* this object is copied by OCL kernel */
+ cl_mem tmp_ker_buf; /* this object is tmp buffer for OCL kernel copying */
size_t offset; /* offset of host_ptr to the page beginning, only for CL_MEM_USE_HOST_PTR*/
uint8_t cmrt_mem_type; /* CmBuffer, CmSurface2D, ... */
--
2.7.4
More information about the Beignet
mailing list