[Libva] [PATCH intel-driver 5/5] decoder: h264: optimize support for grayscale surfaces.
Gwenole Beauchesne
gb.devel at gmail.com
Fri May 9 23:03:34 PDT 2014
Optimize support for grayscale surfaces in two aspects: (i) space
by only allocating the luma component ; (ii) speed by avoiding
initialization of the (now inexistent) chroma planes.
Keep backward compatibility with older codec layers that only
supported YUV 4:2:0 and not grayscale formats properly.
Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
---
src/gen6_mfd.c | 21 +++++---------
src/gen75_mfd.c | 19 ++++---------
src/gen7_mfd.c | 19 ++++---------
src/gen8_mfd.c | 19 ++++---------
src/i965_decoder_utils.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++--
src/i965_decoder_utils.h | 8 ++++++
src/i965_drv_video.c | 43 +++++++++++++++++++++++++----
src/i965_drv_video.h | 12 ++++++++
8 files changed, 151 insertions(+), 61 deletions(-)
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 22d8a51..6ec2278 100755
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
{
struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
struct object_surface *obj_surface = decode_state->render_object;
-
+ unsigned int surface_format;
+
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 19) |
((obj_surface->orig_width - 1) << 6));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
(1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
(0 << 22) | /* surface object control state, FIXME??? */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -842,18 +846,7 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
obj_surface->flags |= SURFACE_REFERENCED;
else
obj_surface->flags &= ~SURFACE_REFERENCED;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
-
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
index cb85996..d2dbb69 100644
--- a/src/gen75_mfd.c
+++ b/src/gen75_mfd.c
@@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -1086,18 +1090,7 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
obj_surface->flags |= SURFACE_REFERENCED;
else
obj_surface->flags &= ~SURFACE_REFERENCED;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
-
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 9891cee..7f92142 100755
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -760,18 +764,7 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
obj_surface->flags |= SURFACE_REFERENCED;
else
obj_surface->flags &= ~SURFACE_REFERENCED;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
-
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
index c1f80e4..32731f5 100644
--- a/src/gen8_mfd.c
+++ b/src/gen8_mfd.c
@@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
struct object_surface *obj_surface = decode_state->render_object;
unsigned int y_cb_offset;
unsigned int y_cr_offset;
+ unsigned int surface_format;
assert(obj_surface);
y_cb_offset = obj_surface->y_cb_offset;
y_cr_offset = obj_surface->y_cr_offset;
+ surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
+ MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
+
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
OUT_BCS_BATCH(batch, 0);
@@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
((obj_surface->orig_height - 1) << 18) |
((obj_surface->orig_width - 1) << 4));
OUT_BCS_BATCH(batch,
- (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+ (surface_format << 28) | /* 420 planar YUV surface */
((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
(0 << 22) | /* surface object control state, ignored */
((obj_surface->width - 1) << 3) | /* pitch */
@@ -847,18 +851,7 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
obj_surface->flags |= SURFACE_REFERENCED;
else
obj_surface->flags &= ~SURFACE_REFERENCED;
- i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
-
- /* initial uv component for YUV400 case */
- if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
- unsigned int uv_offset = obj_surface->width * obj_surface->height;
- unsigned int uv_size = obj_surface->width * obj_surface->height / 2;
-
- drm_intel_gem_bo_map_gtt(obj_surface->bo);
- memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
- drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
- }
-
+ avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index 525efca..a36fbdb 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -174,6 +174,73 @@ mpeg2_set_reference_surfaces(
}
}
+/* Ensure the supplied VA surface has valid storage for decoding the
+ current picture */
+VAStatus
+avc_ensure_surface_bo(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct object_surface *obj_surface,
+ const VAPictureParameterBufferH264 *pic_param
+)
+{
+ VAStatus va_status;
+ uint32_t hw_fourcc, fourcc, subsample, chroma_format;
+
+ /* Validate chroma format */
+ switch (pic_param->seq_fields.bits.chroma_format_idc) {
+ case 0: // Grayscale
+ fourcc = VA_FOURCC_Y800;
+ subsample = SUBSAMPLE_YUV400;
+ chroma_format = VA_RT_FORMAT_YUV400;
+ break;
+ case 1: // YUV 4:2:0
+ fourcc = VA_FOURCC_NV12;
+ subsample = SUBSAMPLE_YUV420;
+ chroma_format = VA_RT_FORMAT_YUV420;
+ break;
+ default:
+ return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+ }
+
+ /* Determine the HW surface format, bound to VA config needs */
+ if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
+ hw_fourcc = fourcc;
+ else {
+ hw_fourcc = 0;
+ switch (fourcc) {
+ case VA_FOURCC_Y800: // Implement with an NV12 surface
+ if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
+ hw_fourcc = VA_FOURCC_NV12;
+ subsample = SUBSAMPLE_YUV420;
+ }
+ break;
+ }
+ }
+ if (!hw_fourcc)
+ return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
+
+ /* (Re-)allocate the underlying surface buffer store, if necessary */
+ if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
+ i965_destroy_surface_storage(obj_surface);
+ va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1,
+ hw_fourcc, subsample);
+ if (va_status != VA_STATUS_SUCCESS)
+ return va_status;
+ }
+
+ /* Fake chroma components if grayscale is implemented on top of NV12 */
+ if (fourcc == VA_FOURCC_Y800 && hw_fourcc == VA_FOURCC_NV12) {
+ const uint32_t uv_offset = obj_surface->width * obj_surface->height;
+ const uint32_t uv_size = obj_surface->width * obj_surface->height / 2;
+
+ drm_intel_gem_bo_map_gtt(obj_surface->bo);
+ memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
+ drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
+ }
+ return VA_STATUS_SUCCESS;
+}
+
/* Generate flat scaling matrices for H.264 decoding */
void
avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
@@ -561,8 +628,8 @@ intel_decoder_check_avc_parameter(VADriverContextP ctx,
* sure the store buffer is allocated for this reference
* frame
*/
- va_status = i965_check_alloc_surface_bo(ctx, obj_surface, 1,
- VA_FOURCC_NV12, SUBSAMPLE_YUV420);
+ va_status = avc_ensure_surface_bo(ctx, decode_state, obj_surface,
+ pic_param);
if (va_status != VA_STATUS_SUCCESS)
return va_status;
}
diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
index b7b72b3..14a45fb 100644
--- a/src/i965_decoder_utils.h
+++ b/src/i965_decoder_utils.h
@@ -43,6 +43,14 @@ mpeg2_set_reference_surfaces(
VAPictureParameterBufferMPEG2 *pic_param
);
+VAStatus
+avc_ensure_surface_bo(
+ VADriverContextP ctx,
+ struct decode_state *decode_state,
+ struct object_surface *obj_surface,
+ const VAPictureParameterBufferH264 *pic_param
+);
+
void
avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 5e1adfc..7dd38c9 100755
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format)
return NULL;
}
+/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
+#define EXTRA_H264_DEC_CHROMA_FORMATS \
+ (VA_RT_FORMAT_YUV400)
+
/* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
#define EXTRA_JPEG_DEC_CHROMA_FORMATS \
(VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444)
@@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = {
.max_width = 2048,
.max_height = 2048,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
+
.has_mpeg2_decoding = 1,
.has_h264_decoding = 1,
.has_h264_encoding = 1,
@@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = {
.max_width = 4096,
.max_height = 4096,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
@@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = {
.max_width = 4096,
.max_height = 4096,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
@@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = {
.max_width = 4096,
.max_height = 4096,
+ .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
.jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
.has_mpeg2_decoding = 1,
@@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile,
uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
switch (profile) {
+ case VAProfileH264ConstrainedBaseline:
+ case VAProfileH264Main:
+ case VAProfileH264High:
+ if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
+ chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
+ break;
+
case VAProfileJPEGBaseline:
if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
@@ -817,10 +833,11 @@ VAStatus i965_QueryConfigAttributes(VADriverContextP ctx,
return vaStatus;
}
-static void
-i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
+void
+i965_destroy_surface_storage(struct object_surface *obj_surface)
{
- struct object_surface *obj_surface = (struct object_surface *)obj;
+ if (!obj_surface)
+ return;
dri_bo_unreference(obj_surface->bo);
obj_surface->bo = NULL;
@@ -829,7 +846,14 @@ i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
obj_surface->free_private_data(&obj_surface->private_data);
obj_surface->private_data = NULL;
}
+}
+static void
+i965_destroy_surface(struct object_heap *heap, struct object_base *obj)
+{
+ struct object_surface *obj_surface = (struct object_surface *)obj;
+
+ i965_destroy_surface_storage(obj_surface);
object_heap_free(heap, obj);
}
@@ -1075,6 +1099,7 @@ bpp_1stplane_by_fourcc(unsigned int fourcc)
case VA_FOURCC_YUY2:
return 2;
+ case VA_FOURCC_Y800:
case VA_FOURCC_YV12:
case VA_FOURCC_IMC3:
case VA_FOURCC_IYUV:
@@ -1677,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx,
struct i965_render_state *render_state = &i965->render_state;
struct object_config *obj_config = CONFIG(config_id);
struct object_context *obj_context = NULL;
+ VAConfigAttrib *attrib;
VAStatus vaStatus = VA_STATUS_SUCCESS;
int contextID;
int i;
@@ -1770,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx,
}
}
+ attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat);
+ if (!attrib)
+ return VA_STATUS_ERROR_INVALID_CONFIG;
+ obj_context->codec_state.base.chroma_formats = attrib->value;
+
/* Error recovery */
if (VA_STATUS_SUCCESS != vaStatus) {
i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
@@ -3083,10 +3114,10 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
obj_surface->cb_cr_pitch = obj_surface->width;
obj_surface->cb_cr_width = 0;
obj_surface->cb_cr_height = 0;
- obj_surface->y_cb_offset = obj_surface->height;
- obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN(obj_surface->cb_cr_height, 32);
+ obj_surface->y_cb_offset = 0;
+ obj_surface->y_cr_offset = 0;
region_width = obj_surface->width;
- region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32) * 2;
+ region_height = obj_surface->height;
break;
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index c66712f..d902ddb 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -101,8 +101,13 @@ struct object_config
#define NUM_SLICES 10
+struct codec_state_base {
+ uint32_t chroma_formats;
+};
+
struct decode_state
{
+ struct codec_state_base base;
struct buffer_store *pic_param;
struct buffer_store **slice_params;
struct buffer_store *iq_matrix;
@@ -122,6 +127,7 @@ struct decode_state
struct encode_state
{
+ struct codec_state_base base;
struct buffer_store *seq_param;
struct buffer_store *pic_param;
struct buffer_store *pic_control;
@@ -152,6 +158,7 @@ struct encode_state
struct proc_state
{
+ struct codec_state_base base;
struct buffer_store *pipeline_param;
VASurfaceID current_render_target;
@@ -163,6 +170,7 @@ struct proc_state
union codec_state
{
+ struct codec_state_base base;
struct decode_state decode;
struct encode_state encode;
struct proc_state proc;
@@ -285,6 +293,7 @@ struct hw_codec_info
int max_width;
int max_height;
+ unsigned int h264_dec_chroma_formats;
unsigned int jpeg_dec_chroma_formats;
unsigned int has_mpeg2_decoding:1;
@@ -421,4 +430,7 @@ extern VAStatus i965_DestroySurfaces(VADriverContextP ctx,
#define I965_SURFACE_MEM_GEM_FLINK 1
#define I965_SURFACE_MEM_DRM_PRIME 2
+void
+i965_destroy_surface_storage(struct object_surface *obj_surface);
+
#endif /* _I965_DRV_VIDEO_H_ */
--
1.9.1
More information about the Libva
mailing list