[Beignet] [PATCH 1/2] add extension intel_planar_yuv.
xionghu.luo at intel.com
xionghu.luo at intel.com
Tue Feb 21 15:26:15 UTC 2017
From: Luo Xionghu <xionghu.luo at intel.com>
create a w* (3/2*h) size bo for the whole CL_NV12_INTEL format
surface, and the y surface (format CL_R) share the first w * h
part, uv surface (format CL_RG) share the left w * 1/2h part; set
correct bo offset for uv surface per different platforms.
Signed-off-by: Luo Xionghu <xionghu.luo at intel.com>
---
include/CL/cl_ext.h | 7 +++
src/cl_api.c | 9 ++-
src/cl_device_id.c | 2 +
src/cl_extensions.h | 5 +-
src/cl_image.c | 7 +++
src/cl_mem.c | 150 +++++++++++++++++++++++++++++++++++++++++++---
src/cl_mem.h | 2 +
src/intel/intel_defines.h | 1 +
src/intel/intel_gpgpu.c | 38 ++++++++++++
src/intel/intel_structs.h | 19 ++----
10 files changed, 216 insertions(+), 24 deletions(-)
diff --git a/include/CL/cl_ext.h b/include/CL/cl_ext.h
index fa34cba..14dfc01 100644
--- a/include/CL/cl_ext.h
+++ b/include/CL/cl_ext.h
@@ -243,6 +243,13 @@ typedef struct _cl_motion_estimation_desc_intel {
#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL 0x1
#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL 0x5
+#define CL_MEM_NO_ACCESS_INTEL (1 << 24)
+#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL (1 << 25)
+
+#define CL_NV12_INTEL 0x410E
+#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL 0x417E
+#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL 0x417F
+
extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
clCreateAcceleratorINTEL(
cl_context /* context */,
diff --git a/src/cl_api.c b/src/cl_api.c
index 24b8b3d..d7d8778 100644
--- a/src/cl_api.c
+++ b/src/cl_api.c
@@ -134,7 +134,7 @@ clCreateImage(cl_context context,
goto error;
}
if (image_format->image_channel_order < CL_R ||
- image_format->image_channel_order > CL_sBGRA) {
+ (image_format->image_channel_order > CL_ABGR && image_format->image_channel_order != CL_NV12_INTEL)) {
err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
goto error;
}
@@ -166,6 +166,13 @@ clCreateImage(cl_context context,
goto error;
}
+ if (image_format->image_channel_order == CL_NV12_INTEL &&
+ image_format->image_channel_data_type != CL_UNORM_INT8 &&
+ ((image_desc->image_width % 4) || (image_desc->image_height % 4))) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
/* Other details check for image_desc will leave to image create. */
mem = cl_mem_new_image(context,
flags,
diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index d4f4208..50ed0d9 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -1075,10 +1075,12 @@ cl_get_device_info(cl_device_id device,
src_size = sizeof(device->image_max_array_size);
break;
case CL_DEVICE_IMAGE2D_MAX_WIDTH:
+ case CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL:
src_ptr = &device->image2d_max_width;
src_size = sizeof(device->image2d_max_width);
break;
case CL_DEVICE_IMAGE2D_MAX_HEIGHT:
+ case CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL:
src_ptr = &device->image2d_max_height;
src_size = sizeof(device->image2d_max_height);
break;
diff --git a/src/cl_extensions.h b/src/cl_extensions.h
index 52a4953..55747a7 100644
--- a/src/cl_extensions.h
+++ b/src/cl_extensions.h
@@ -29,7 +29,8 @@
DECL_EXT(intel_accelerator) \
DECL_EXT(intel_motion_estimation) \
DECL_EXT(intel_subgroups) \
- DECL_EXT(intel_subgroups_short)
+ DECL_EXT(intel_subgroups_short) \
+ DECL_EXT(intel_planar_yuv)
#define DECL_GL_EXTENSIONS \
DECL_EXT(khr_gl_sharing)\
@@ -64,7 +65,7 @@ cl_khr_extension_id_max
#define OPT1_EXT_START_ID EXT_ID(khr_int64_base_atomics)
#define OPT1_EXT_END_ID EXT_ID(khr_icd)
#define INTEL_EXT_START_ID EXT_ID(intel_accelerator)
-#define INTEL_EXT_END_ID EXT_ID(intel_subgroups_short)
+#define INTEL_EXT_END_ID EXT_ID(intel_planar_yuv)
#define GL_EXT_START_ID EXT_ID(khr_gl_sharing)
#define GL_EXT_END_ID EXT_ID(khr_gl_msaa_sharing)
diff --git a/src/cl_image.c b/src/cl_image.c
index 5ff459a..fbdc17b 100644
--- a/src/cl_image.c
+++ b/src/cl_image.c
@@ -17,6 +17,7 @@
* Author: Benjamin Segovia <benjamin.segovia at intel.com>
*/
+#include "CL/cl_ext.h"
#include "cl_image.h"
#include "cl_utils.h"
#include "intel/intel_defines.h"
@@ -97,6 +98,7 @@ cl_image_byte_per_pixel(const cl_image_format *fmt, uint32_t *bpp)
return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
*bpp *= 4;
break;
+ case CL_NV12_INTEL: break;
default: return CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
};
@@ -208,6 +210,11 @@ cl_image_get_intel_format(const cl_image_format *fmt)
case CL_UNORM_INT8: return I965_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
default: return INTEL_UNSUPPORTED_FORMAT;
};
+ case CL_NV12_INTEL:
+ switch (type) {
+ case CL_UNORM_INT8: return I965_SURFACEFORMAT_PLANAR_420_8;
+ default: return INTEL_UNSUPPORTED_FORMAT;
+ };
default: return INTEL_UNSUPPORTED_FORMAT;
};
}
diff --git a/src/cl_mem.c b/src/cl_mem.c
index 0278b7f..9d63a98 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -152,6 +152,7 @@ cl_mem_allocate(enum cl_mem_type type,
mem->cmrt_mem = NULL;
if (mem->type == CL_MEM_IMAGE_TYPE) {
cl_mem_image(mem)->is_image_from_buffer = 0;
+ cl_mem_image(mem)->is_image_from_nv12_image = 0;
}
if (sz != 0) {
@@ -230,7 +231,11 @@ cl_mem_allocate(enum cl_mem_type type,
}
// if the image if created from buffer, should use the bo directly to share same bo.
mem->bo = buffer->bo;
- cl_mem_image(mem)->is_image_from_buffer = 1;
+ if (IS_IMAGE(buffer) && cl_mem_image(buffer)->fmt.image_channel_order == CL_NV12_INTEL) {
+ cl_mem_image(mem)->is_image_from_nv12_image = 1;
+ } else {
+ cl_mem_image(mem)->is_image_from_buffer = 1;
+ }
bufCreated = 1;
}
@@ -827,7 +832,7 @@ _cl_mem_new_image(cl_context ctx,
h = (w + ctx->devices[0]->image2d_max_width - 1) / ctx->devices[0]->image2d_max_width;
w = w > ctx->devices[0]->image2d_max_width ? ctx->devices[0]->image2d_max_width : w;
tiling = CL_NO_TILE;
- } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
+ } else if(image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) {
tiling = CL_NO_TILE;
} else if (cl_driver_get_ver(ctx->drv) != 6) {
/* Pick up tiling mode (we do only linear on SNB) */
@@ -873,6 +878,9 @@ _cl_mem_new_image(cl_context ctx,
assert(0);
#undef DO_IMAGE_ERROR
+ if (fmt->image_channel_order == CL_NV12_INTEL) {
+ h += h/2;
+ }
uint8_t enableUserptr = 0;
if (enable_true_hostptr && ctx->devices[0]->host_unified_memory && data != NULL && (flags & CL_MEM_USE_HOST_PTR)) {
@@ -894,7 +902,7 @@ _cl_mem_new_image(cl_context ctx,
aligned_pitch = pitch;
//no need align the height if 2d image from buffer.
//the pitch should be same with buffer's pitch as they share same bo.
- if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
+ if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) {
if(aligned_pitch < pitch) {
aligned_pitch = pitch;
}
@@ -911,7 +919,7 @@ _cl_mem_new_image(cl_context ctx,
}
sz = aligned_pitch * aligned_h * depth;
- if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL) {
+ if (image_type == CL_MEM_OBJECT_IMAGE2D && buffer != NULL && !IS_IMAGE(buffer)) {
//image 2d created from buffer: per spec, the buffer sz maybe larger than the image 2d.
if (buffer->size >= sz)
sz = buffer->size;
@@ -979,6 +987,11 @@ _cl_mem_new_image(cl_context ctx,
cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data);
}
+ /* copy yuv data if required */
+ if(fmt->image_channel_order == CL_NV12_INTEL && data) {
+ cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data);
+ }
+
exit:
if (errcode_ret)
*errcode_ret = err;
@@ -990,6 +1003,113 @@ error:
}
static cl_mem
+_cl_mem_new_image_from_nv12_image(cl_context ctx,
+ cl_mem_flags flags,
+ const cl_image_format* image_format,
+ const cl_image_desc *image_desc,
+ cl_int *errcode_ret)
+{
+ cl_mem image = NULL;
+ cl_mem imageIn = image_desc->mem_object;
+ cl_int err = CL_SUCCESS;
+ *errcode_ret = err;
+ uint32_t bpp;
+ uint32_t intel_fmt = INTEL_UNSUPPORTED_FORMAT;
+ size_t width = 0;
+ size_t height = 0;
+ size_t depth = 0;
+
+ /* Get the size of each pixel */
+ if (UNLIKELY((err = cl_image_byte_per_pixel(image_format, &bpp)) != CL_SUCCESS))
+ goto error;
+
+ /* Only a sub-set of the formats are supported */
+ intel_fmt = cl_image_get_intel_format(image_format);
+ if (UNLIKELY(intel_fmt == INTEL_UNSUPPORTED_FORMAT)) {
+ err = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR;
+ goto error;
+ }
+
+ if (!imageIn && cl_mem_image(imageIn)->fmt.image_channel_order != CL_NV12_INTEL) {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ width = cl_mem_image(imageIn)->w;
+ if (image_desc->image_depth == 0) {
+ height = cl_mem_image(imageIn)->h * 2 / 3;
+ } else if (image_desc->image_depth == 1) {
+ width = cl_mem_image(imageIn)->w / 2;
+ height = cl_mem_image(imageIn)->h / 3;
+ } else {
+ err = CL_INVALID_IMAGE_DESCRIPTOR;
+ goto error;
+ }
+
+ //flags check here.
+ if ((flags & CL_MEM_USE_HOST_PTR) || (flags & CL_MEM_ALLOC_HOST_PTR) ||
+ (flags & CL_MEM_COPY_HOST_PTR)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+
+ if (!(imageIn->flags & CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL)) {
+ if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_READ_ONLY)) &&
+ (imageIn->flags & CL_MEM_WRITE_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_WRITE_ONLY)) &&
+ (imageIn->flags | CL_MEM_READ_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (((flags & CL_MEM_READ_WRITE) || (flags & CL_MEM_WRITE_ONLY) ||(flags & CL_MEM_READ_ONLY)) &&
+ (imageIn->flags & CL_MEM_NO_ACCESS_INTEL)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((flags & CL_MEM_HOST_READ_ONLY) &&
+ (imageIn->flags & CL_MEM_HOST_WRITE_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if ((flags & CL_MEM_HOST_WRITE_ONLY) &&
+ (imageIn->flags & CL_MEM_HOST_READ_ONLY)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ if (((flags & CL_MEM_HOST_READ_ONLY) || (flags & CL_MEM_HOST_WRITE_ONLY)) &&
+ (imageIn->flags & CL_MEM_HOST_NO_ACCESS)) {
+ err = CL_INVALID_VALUE;
+ goto error;
+ }
+ }
+
+ image = _cl_mem_new_image(ctx, flags, image_format, image_desc->image_type,
+ width, height, depth, cl_mem_image(imageIn)->row_pitch,
+ 0, NULL,
+ imageIn, errcode_ret);
+ if (image == NULL)
+ return NULL;
+
+
+ if (image_desc->image_depth == 1) {
+ cl_mem_image(image)->offset = cl_mem_image(imageIn)->row_pitch * height * 2;
+ }
+ cl_mem_image(image)->nv12_image = imageIn;
+ cl_mem_add_ref(imageIn);
+ return image;
+
+error:
+ if (image)
+ cl_mem_delete(image);
+ image = NULL;
+ *errcode_ret = err;
+ return image;
+}
+
+static cl_mem
_cl_mem_new_image_from_buffer(cl_context ctx,
cl_mem_flags flags,
const cl_image_format* image_format,
@@ -1034,7 +1154,7 @@ _cl_mem_new_image_from_buffer(cl_context ctx,
goto error;
}
if ((buffer->flags & CL_MEM_READ_ONLY) &&
- (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) {
+ (flags & (CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY))) {
err = CL_INVALID_VALUE;
goto error;
}
@@ -1169,9 +1289,14 @@ cl_mem_new_image(cl_context context,
image_desc->image_row_pitch, image_desc->image_slice_pitch,
host_ptr, NULL, errcode_ret);
case CL_MEM_OBJECT_IMAGE2D:
- if(image_desc->buffer)
- return _cl_mem_new_image_from_buffer(context, flags, image_format,
- image_desc, errcode_ret);
+ if (image_desc->buffer) {
+ if (IS_IMAGE(image_desc->buffer)) {
+ return _cl_mem_new_image_from_nv12_image(context, flags, image_format,
+ image_desc, errcode_ret);
+ } else
+ return _cl_mem_new_image_from_buffer(context, flags, image_format,
+ image_desc, errcode_ret);
+ }
else
return _cl_mem_new_image(context, flags, image_format, image_desc->image_type,
image_desc->image_width, image_desc->image_height, image_desc->image_depth,
@@ -1247,6 +1372,15 @@ cl_mem_delete(cl_mem mem)
mem->bo = NULL;
}
}
+ if (cl_mem_image(mem)->nv12_image) {
+ assert(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D);
+ cl_mem_delete(cl_mem_image(mem)->nv12_image);
+ if(cl_mem_image(mem)->image_type == CL_MEM_OBJECT_IMAGE2D && cl_mem_image(mem)->is_image_from_nv12_image == 1)
+ {
+ cl_mem_image(mem)->nv12_image = NULL;
+ mem->bo = NULL;
+ }
+ }
}
/* Someone still mapped, unmap */
diff --git a/src/cl_mem.h b/src/cl_mem.h
index 4764401..edfd043 100644
--- a/src/cl_mem.h
+++ b/src/cl_mem.h
@@ -143,6 +143,8 @@ struct _cl_mem_image {
size_t offset; /* offset for dri_bo, used when it's reloc. */
cl_mem buffer_1d; /* if the image is created from buffer, it point to the buffer.*/
uint8_t is_image_from_buffer; /* IMAGE from Buffer*/
+ cl_mem nv12_image; /* if the image is created from nv12 Image, it point to the image.*/
+ uint8_t is_image_from_nv12_image; /* IMAGE from NV12 Image*/
};
struct _cl_mem_gl_image {
diff --git a/src/intel/intel_defines.h b/src/intel/intel_defines.h
index 6ada30c..36be4fb 100644
--- a/src/intel/intel_defines.h
+++ b/src/intel/intel_defines.h
@@ -267,6 +267,7 @@
#define I965_SURFACEFORMAT_R16G16B16_SNORM 0x19D
#define I965_SURFACEFORMAT_R16G16B16_SSCALED 0x19E
#define I965_SURFACEFORMAT_R16G16B16_USCALED 0x19F
+#define I965_SURFACEFORMAT_PLANAR_420_8 0x1A5
#define I965_SURFACEFORMAT_RAW 0x1FF
#define I965_MAPFILTER_NEAREST 0x0
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index 283b07a..041938f 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -1258,6 +1258,14 @@ intel_gpgpu_bind_image_gen7(intel_gpgpu_t *gpgpu,
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
}
+
+ if (obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset )/ pitch;
+ ss->ss5.y_offset = h_ / 2;
+ }
+
ss->ss0.surface_format = format;
ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
ss->ss2.width = w - 1;
@@ -1354,6 +1362,14 @@ intel_gpgpu_bind_image_gen75(intel_gpgpu_t *gpgpu,
ss->ss0.surface_array = 1;
ss->ss0.surface_array_spacing = 1;
}
+
+ if (obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset )/ pitch;
+ ss->ss5.y_offset = h_ / 2;
+ }
+
ss->ss0.surface_format = format;
ss->ss1.base_addr = obj_bo->offset + obj_bo_offset;
ss->ss2.width = w - 1;
@@ -1419,6 +1435,13 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
ss->ss2.height = h - 1;
ss->ss3.depth = depth - 1;
+ if(obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset) / pitch;
+ ss->ss5.y_offset = h_ / 4;
+ }
+
ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
@@ -1427,6 +1450,10 @@ intel_gpgpu_bind_image_gen8(intel_gpgpu_t *gpgpu,
ss->ss3.surface_pitch = pitch - 1;
ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
+ //NV12 surface. the height is 3/2 * h, so need set proper offset here.
+ if (format == I965_SURFACEFORMAT_PLANAR_420_8)
+ ss->ss6.uv_plane_y_offset = h * 2 / 3;
+
ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
ss->ss7.shader_channel_select_blue = I965_SURCHAN_SELECT_BLUE;
@@ -1495,6 +1522,13 @@ intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
ss->ss2.height = h - 1;
ss->ss3.depth = depth - 1;
+ if (obj_bo_offset && tiling != GPGPU_NO_TILE) {
+ uint32_t unaligned = obj_bo_offset;
+ obj_bo_offset = (obj_bo_offset / 0x1000) * 0x1000;
+ uint32_t h_ = (unaligned - obj_bo_offset )/ pitch;
+ ss->ss5.y_offset = h_ / 4;
+ }
+
ss->ss8.surface_base_addr_lo = (obj_bo->offset64 + obj_bo_offset) & 0xffffffff;
ss->ss9.surface_base_addr_hi = ((obj_bo->offset64 + obj_bo_offset) >> 32) & 0xffffffff;
@@ -1502,6 +1536,10 @@ intel_gpgpu_bind_image_gen9(intel_gpgpu_t *gpgpu,
ss->ss4.min_array_elt = 0;
ss->ss3.surface_pitch = pitch - 1;
+ //NV12 surface. the height is 3/2 * h, so need set proper offset here.
+ if (format == I965_SURFACEFORMAT_PLANAR_420_8)
+ ss->ss6.uv_plane_y_offset = h * 2 / 3;
+
ss->ss1.mem_obj_ctrl_state = cl_gpgpu_get_cache_ctrl();
ss->ss7.shader_channel_select_red = I965_SURCHAN_SELECT_RED;
ss->ss7.shader_channel_select_green = I965_SURCHAN_SELECT_GREEN;
diff --git a/src/intel/intel_structs.h b/src/intel/intel_structs.h
index c112a16..b38cc42 100644
--- a/src/intel/intel_structs.h
+++ b/src/intel/intel_structs.h
@@ -310,29 +310,22 @@ typedef struct gen8_surface_state
} ss5;
struct {
- union {
union {
struct {
uint32_t aux_surface_mode:3;
uint32_t aux_surface_pitch:9;
uint32_t pad3:4;
+ uint32_t aux_sruface_qpitch:15;
+ uint32_t pad2:1;
};
- struct {
- uint32_t uv_plane_y_offset:14;
- uint32_t pad2:2;
- };
- };
struct {
- uint32_t uv_plane_x_offset:14;
- uint32_t pad1:1;
- uint32_t seperate_uv_plane_enable:1;
+ uint32_t uv_plane_y_offset : 14;
+ uint32_t pad1 : 2;
+ uint32_t uv_plane_x_offset : 14;
+ uint32_t pad0 : 2;
};
- struct {
- uint32_t aux_sruface_qpitch:15;
- uint32_t pad0:1;
};
- };
} ss6;
struct {
--
2.5.0
More information about the Beignet
mailing list