[Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.
Zhao, Yakui
yakui.zhao at intel.com
Wed May 14 18:34:21 PDT 2014
On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
> Optimize support for grayscale surfaces in two aspects: (i) space
> by only allocating the luminance component ; (ii) speed by avoiding
> initialization of the (now inexistent) chrominance planes.
>
> Keep backward compatibility with older codec layers that only
> supported YUV 4:2:0 and not grayscale formats properly.
As a whole, I am OK to this version patch except two concerns.
>
> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> ---
> src/gen6_mfd.c | 8 ++++++--
> src/gen75_mfd.c | 6 +++++-
> src/gen7_mfd.c | 6 +++++-
> src/gen8_mfd.c | 6 +++++-
> src/i965_decoder_utils.c | 23 +++++++++++++++++++----
> src/i965_drv_video.c | 22 ++++++++++++++++++++++
> src/i965_drv_video.h | 9 +++++++++
> 7 files changed, 71 insertions(+), 9 deletions(-)
>
> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
> index 2092f69..f925d98 100755
> --- a/src/gen6_mfd.c
> +++ b/src/gen6_mfd.c
> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> {
> struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
> struct object_surface *obj_surface = decode_state->render_object;
> -
> + unsigned int surface_format;
> +
> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
Can it work if it still set the PLANAR_420_8 format for the Y800
surface?
If yes, I suggest that this is not updated as the original code follows
the spec.
> BEGIN_BCS_BATCH(batch, 6);
> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> OUT_BCS_BATCH(batch, 0);
> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
> ((obj_surface->orig_height - 1) << 19) |
> ((obj_surface->orig_width - 1) << 6));
> OUT_BCS_BATCH(batch,
> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
> + (surface_format << 28) | /* 420 planar YUV surface */
> (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
> (0 << 22) | /* surface object control state, FIXME??? */
> ((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> index 5b023cf..895b194 100644
> --- a/src/gen75_mfd.c
> +++ b/src/gen75_mfd.c
> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
> struct object_surface *obj_surface = decode_state->render_object;
> unsigned int y_cb_offset;
> unsigned int y_cr_offset;
> + unsigned int surface_format;
>
> assert(obj_surface);
>
> y_cb_offset = obj_surface->y_cb_offset;
> y_cr_offset = obj_surface->y_cr_offset;
>
> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
> BEGIN_BCS_BATCH(batch, 6);
> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> OUT_BCS_BATCH(batch, 0);
> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
> ((obj_surface->orig_height - 1) << 18) |
> ((obj_surface->orig_width - 1) << 4));
> OUT_BCS_BATCH(batch,
> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
> + (surface_format << 28) | /* 420 planar YUV surface */
> ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
> (0 << 22) | /* surface object control state, ignored */
> ((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
> index 70b1cec..2e0d653 100755
> --- a/src/gen7_mfd.c
> +++ b/src/gen7_mfd.c
> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
> struct object_surface *obj_surface = decode_state->render_object;
> unsigned int y_cb_offset;
> unsigned int y_cr_offset;
> + unsigned int surface_format;
>
> assert(obj_surface);
>
> y_cb_offset = obj_surface->y_cb_offset;
> y_cr_offset = obj_surface->y_cr_offset;
>
> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
> BEGIN_BCS_BATCH(batch, 6);
> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> OUT_BCS_BATCH(batch, 0);
> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
> ((obj_surface->orig_height - 1) << 18) |
> ((obj_surface->orig_width - 1) << 4));
> OUT_BCS_BATCH(batch,
> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
> + (surface_format << 28) | /* 420 planar YUV surface */
> ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
> (0 << 22) | /* surface object control state, ignored */
> ((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
> index e3e71fb..10495d8 100644
> --- a/src/gen8_mfd.c
> +++ b/src/gen8_mfd.c
> @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
> struct object_surface *obj_surface = decode_state->render_object;
> unsigned int y_cb_offset;
> unsigned int y_cr_offset;
> + unsigned int surface_format;
>
> assert(obj_surface);
>
> y_cb_offset = obj_surface->y_cb_offset;
> y_cr_offset = obj_surface->y_cr_offset;
>
> + surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
> + MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
> +
> BEGIN_BCS_BATCH(batch, 6);
> OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> OUT_BCS_BATCH(batch, 0);
> @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
> ((obj_surface->orig_height - 1) << 18) |
> ((obj_surface->orig_width - 1) << 4));
> OUT_BCS_BATCH(batch,
> - (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
> + (surface_format << 28) | /* 420 planar YUV surface */
> ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
> (0 << 22) | /* surface object control state, ignored */
> ((obj_surface->width - 1) << 3) | /* pitch */
> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
> index 6cec08b..ae01d13 100644
> --- a/src/i965_decoder_utils.c
> +++ b/src/i965_decoder_utils.c
> @@ -185,25 +185,40 @@ avc_ensure_surface_bo(
> )
> {
> VAStatus va_status;
> - uint32_t hw_fourcc, fourcc, subsample;
> + uint32_t hw_fourcc, fourcc, subsample, chroma_format;
>
> /* Validate chroma format */
> switch (pic_param->seq_fields.bits.chroma_format_idc) {
> case 0: // Grayscale
> fourcc = VA_FOURCC_Y800;
> subsample = SUBSAMPLE_YUV400;
> + chroma_format = VA_RT_FORMAT_YUV400;
> break;
> case 1: // YUV 4:2:0
> fourcc = VA_FOURCC_NV12;
> subsample = SUBSAMPLE_YUV420;
> + chroma_format = VA_RT_FORMAT_YUV420;
> break;
> default:
> return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
> }
>
When calling the vaCreateSurfaces to create the surface for the decoding
purpose, the chroma_format will also be passed. In such case can we use
the passed chroma_format for the following operation instead of the
chroma_format derived in the driver?
That is to say: if the YUV420 is passed, we still allocate the NV12
surface. If the YUV400 is passed, we can allocate the Y800 surface for
the decoding.
How do you think?
> - /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */
> - hw_fourcc = VA_FOURCC_NV12;
> - subsample = SUBSAMPLE_YUV420;
> + /* Determine the HW surface format, bound to VA config needs */
> + if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
> + hw_fourcc = fourcc;
> + else {
> + hw_fourcc = 0;
> + switch (fourcc) {
> + case VA_FOURCC_Y800: // Implement with an NV12 surface
> + if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
> + hw_fourcc = VA_FOURCC_NV12;
> + subsample = SUBSAMPLE_YUV420;
> + }
> + break;
> + }
> + }
> + if (!hw_fourcc)
> + return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>
> /* (Re-)allocate the underlying surface buffer store, if necessary */
> if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
> index 768469a..686f4e3 100755
> --- a/src/i965_drv_video.c
> +++ b/src/i965_drv_video.c
> @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format)
> return NULL;
> }
>
> +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
> +#define EXTRA_H264_DEC_CHROMA_FORMATS \
> + (VA_RT_FORMAT_YUV400)
> +
> /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
> #define EXTRA_JPEG_DEC_CHROMA_FORMATS \
> (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444)
> @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = {
> .max_width = 2048,
> .max_height = 2048,
>
> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
> +
> .has_mpeg2_decoding = 1,
> .has_h264_decoding = 1,
> .has_h264_encoding = 1,
> @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = {
> .max_width = 4096,
> .max_height = 4096,
>
> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
> .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>
> .has_mpeg2_decoding = 1,
> @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = {
> .max_width = 4096,
> .max_height = 4096,
>
> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
> .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>
> .has_mpeg2_decoding = 1,
> @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = {
> .max_width = 4096,
> .max_height = 4096,
>
> + .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
> .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>
> .has_mpeg2_decoding = 1,
> @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile,
> uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
>
> switch (profile) {
> + case VAProfileH264ConstrainedBaseline:
> + case VAProfileH264Main:
> + case VAProfileH264High:
> + if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
> + chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
> + break;
> +
> case VAProfileJPEGBaseline:
> if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
> chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
> @@ -1686,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx,
> struct i965_render_state *render_state = &i965->render_state;
> struct object_config *obj_config = CONFIG(config_id);
> struct object_context *obj_context = NULL;
> + VAConfigAttrib *attrib;
> VAStatus vaStatus = VA_STATUS_SUCCESS;
> int contextID;
> int i;
> @@ -1779,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx,
> }
> }
>
> + attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat);
> + if (!attrib)
> + return VA_STATUS_ERROR_INVALID_CONFIG;
> + obj_context->codec_state.base.chroma_formats = attrib->value;
> +
> /* Error recovery */
> if (VA_STATUS_SUCCESS != vaStatus) {
> i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
> index e70852b..e14dcf8 100644
> --- a/src/i965_drv_video.h
> +++ b/src/i965_drv_video.h
> @@ -101,8 +101,13 @@ struct object_config
>
> #define NUM_SLICES 10
>
> +struct codec_state_base {
> + uint32_t chroma_formats;
> +};
> +
> struct decode_state
> {
> + struct codec_state_base base;
> struct buffer_store *pic_param;
> struct buffer_store **slice_params;
> struct buffer_store *iq_matrix;
> @@ -122,6 +127,7 @@ struct decode_state
>
> struct encode_state
> {
> + struct codec_state_base base;
> struct buffer_store *seq_param;
> struct buffer_store *pic_param;
> struct buffer_store *pic_control;
> @@ -152,6 +158,7 @@ struct encode_state
>
> struct proc_state
> {
> + struct codec_state_base base;
> struct buffer_store *pipeline_param;
>
> VASurfaceID current_render_target;
> @@ -163,6 +170,7 @@ struct proc_state
>
> union codec_state
> {
> + struct codec_state_base base;
> struct decode_state decode;
> struct encode_state encode;
> struct proc_state proc;
> @@ -289,6 +297,7 @@ struct hw_codec_info
> int max_width;
> int max_height;
>
> + unsigned int h264_dec_chroma_formats;
> unsigned int jpeg_dec_chroma_formats;
>
> unsigned int has_mpeg2_decoding:1;
More information about the Libva
mailing list