[Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

Gwenole Beauchesne gb.devel at gmail.com
Wed May 14 21:28:37 PDT 2014


Hi,

2014-05-15 3:34 GMT+02:00 Zhao, Yakui <yakui.zhao at intel.com>:
> On Wed, 2014-05-14 at 07:13 -0600, Gwenole Beauchesne wrote:
>> Optimize support for grayscale surfaces in two aspects: (i) space
>> by only allocating the luminance component ; (ii) speed by avoiding
>> initialization of the (now inexistent) chrominance planes.
>>
>> Keep backward compatibility with older codec layers that only
>> supported YUV 4:2:0 and not grayscale formats properly.
>
> As a whole, I am OK to this version patch except two concerns.
>
>>
>> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
>> ---
>>  src/gen6_mfd.c           |    8 ++++++--
>>  src/gen75_mfd.c          |    6 +++++-
>>  src/gen7_mfd.c           |    6 +++++-
>>  src/gen8_mfd.c           |    6 +++++-
>>  src/i965_decoder_utils.c |   23 +++++++++++++++++++----
>>  src/i965_drv_video.c     |   22 ++++++++++++++++++++++
>>  src/i965_drv_video.h     |    9 +++++++++
>>  7 files changed, 71 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> index 2092f69..f925d98 100755
>> --- a/src/gen6_mfd.c
>> +++ b/src/gen6_mfd.c
>> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>  {
>>      struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>>      struct object_surface *obj_surface = decode_state->render_object;
>> -
>> +    unsigned int surface_format;
>> +
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>
> Can it work if it still set the PLANAR_420_8 format for the Y800
> surface?

No, MONO is the only specified and supported format to tell the MFX
engine to disregard the chroma components. It does not seem to care of
the supplied chroma_format_idc field to AVC_IMG_STATE.

>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 19) |
>>                    ((obj_surface->orig_width - 1) << 6));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
>>                    (0 << 22) | /* surface object control state, FIXME??? */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>> index 5b023cf..895b194 100644
>> --- a/src/gen75_mfd.c
>> +++ b/src/gen75_mfd.c
>> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>      struct object_surface *obj_surface = decode_state->render_object;
>>      unsigned int y_cb_offset;
>>      unsigned int y_cr_offset;
>> +    unsigned int surface_format;
>>
>>      assert(obj_surface);
>>
>>      y_cb_offset = obj_surface->y_cb_offset;
>>      y_cr_offset = obj_surface->y_cr_offset;
>>
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 18) |
>>                    ((obj_surface->orig_width - 1) << 4));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
>>                    (0 << 22) | /* surface object control state, ignored */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
>> index 70b1cec..2e0d653 100755
>> --- a/src/gen7_mfd.c
>> +++ b/src/gen7_mfd.c
>> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>      struct object_surface *obj_surface = decode_state->render_object;
>>      unsigned int y_cb_offset;
>>      unsigned int y_cr_offset;
>> +    unsigned int surface_format;
>>
>>      assert(obj_surface);
>>
>>      y_cb_offset = obj_surface->y_cb_offset;
>>      y_cr_offset = obj_surface->y_cr_offset;
>>
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 18) |
>>                    ((obj_surface->orig_width - 1) << 4));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
>>                    (0 << 22) | /* surface object control state, ignored */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
>> index e3e71fb..10495d8 100644
>> --- a/src/gen8_mfd.c
>> +++ b/src/gen8_mfd.c
>> @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
>>      struct object_surface *obj_surface = decode_state->render_object;
>>      unsigned int y_cb_offset;
>>      unsigned int y_cr_offset;
>> +    unsigned int surface_format;
>>
>>      assert(obj_surface);
>>
>>      y_cb_offset = obj_surface->y_cb_offset;
>>      y_cr_offset = obj_surface->y_cr_offset;
>>
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 18) |
>>                    ((obj_surface->orig_width - 1) << 4));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
>>                    (0 << 22) | /* surface object control state, ignored */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
>> index 6cec08b..ae01d13 100644
>> --- a/src/i965_decoder_utils.c
>> +++ b/src/i965_decoder_utils.c
>> @@ -185,25 +185,40 @@ avc_ensure_surface_bo(
>>  )
>>  {
>>      VAStatus va_status;
>> -    uint32_t hw_fourcc, fourcc, subsample;
>> +    uint32_t hw_fourcc, fourcc, subsample, chroma_format;
>>
>>      /* Validate chroma format */
>>      switch (pic_param->seq_fields.bits.chroma_format_idc) {
>>      case 0: // Grayscale
>>          fourcc = VA_FOURCC_Y800;
>>          subsample = SUBSAMPLE_YUV400;
>> +        chroma_format = VA_RT_FORMAT_YUV400;
>>          break;
>>      case 1: // YUV 4:2:0
>>          fourcc = VA_FOURCC_NV12;
>>          subsample = SUBSAMPLE_YUV420;
>> +        chroma_format = VA_RT_FORMAT_YUV420;
>>          break;
>>      default:
>>          return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>>      }
>>
>
> When calling the vaCreateSurfaces to create the surface for the decoding
> purpose, the chroma_format will also be passed. In such case can we use
> the passed chroma_format for the following operation instead of the
> chroma_format derived in the driver?
> That is to say: if the YUV420 is passed, we still allocate the NV12
> surface. If the YUV400 is passed, we can allocate the Y800 surface for
> the decoding.
>
> How do you think?

We used to disregard the user specified format, and defer allocation
until it is actually needed so that to cope with possible misguided
parameters. If we want to trust user provided formats now, we'd need
to review and provide additional patches to cover robustness needs in
other areas not limited to H.264 decoding. This is beyond the scope of
this patch series.

>> -    /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */
>> -    hw_fourcc = VA_FOURCC_NV12;
>> -    subsample = SUBSAMPLE_YUV420;
>> +    /* Determine the HW surface format, bound to VA config needs */
>> +    if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
>> +        hw_fourcc = fourcc;
>> +    else {
>> +        hw_fourcc = 0;
>> +        switch (fourcc) {
>> +        case VA_FOURCC_Y800: // Implement with an NV12 surface
>> +            if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
>> +                hw_fourcc = VA_FOURCC_NV12;
>> +                subsample = SUBSAMPLE_YUV420;
>> +            }
>> +            break;
>> +        }
>> +    }
>> +    if (!hw_fourcc)
>> +        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>>
>>      /* (Re-)allocate the underlying surface buffer store, if necessary */
>>      if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
>> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
>> index 768469a..686f4e3 100755
>> --- a/src/i965_drv_video.c
>> +++ b/src/i965_drv_video.c
>> @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format)
>>      return NULL;
>>  }
>>
>> +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
>> +#define EXTRA_H264_DEC_CHROMA_FORMATS \
>> +    (VA_RT_FORMAT_YUV400)
>> +
>>  /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
>>  #define EXTRA_JPEG_DEC_CHROMA_FORMATS \
>>      (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444)
>> @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = {
>>      .max_width = 2048,
>>      .max_height = 2048,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>> +
>>      .has_mpeg2_decoding = 1,
>>      .has_h264_decoding = 1,
>>      .has_h264_encoding = 1,
>> @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = {
>>      .max_width = 4096,
>>      .max_height = 4096,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>>
>>      .has_mpeg2_decoding = 1,
>> @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = {
>>      .max_width = 4096,
>>      .max_height = 4096,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>>
>>      .has_mpeg2_decoding = 1,
>> @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = {
>>      .max_width = 4096,
>>      .max_height = 4096,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>>
>>      .has_mpeg2_decoding = 1,
>> @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile,
>>      uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
>>
>>      switch (profile) {
>> +    case VAProfileH264ConstrainedBaseline:
>> +    case VAProfileH264Main:
>> +    case VAProfileH264High:
>> +        if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
>> +            chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
>> +        break;
>> +
>>      case VAProfileJPEGBaseline:
>>          if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
>>              chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
>> @@ -1686,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx,
>>      struct i965_render_state *render_state = &i965->render_state;
>>      struct object_config *obj_config = CONFIG(config_id);
>>      struct object_context *obj_context = NULL;
>> +    VAConfigAttrib *attrib;
>>      VAStatus vaStatus = VA_STATUS_SUCCESS;
>>      int contextID;
>>      int i;
>> @@ -1779,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx,
>>          }
>>      }
>>
>> +    attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat);
>> +    if (!attrib)
>> +        return VA_STATUS_ERROR_INVALID_CONFIG;
>> +    obj_context->codec_state.base.chroma_formats = attrib->value;
>> +
>>      /* Error recovery */
>>      if (VA_STATUS_SUCCESS != vaStatus) {
>>          i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
>> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
>> index e70852b..e14dcf8 100644
>> --- a/src/i965_drv_video.h
>> +++ b/src/i965_drv_video.h
>> @@ -101,8 +101,13 @@ struct object_config
>>
>>  #define NUM_SLICES     10
>>
>> +struct codec_state_base {
>> +    uint32_t chroma_formats;
>> +};
>> +
>>  struct decode_state
>>  {
>> +    struct codec_state_base base;
>>      struct buffer_store *pic_param;
>>      struct buffer_store **slice_params;
>>      struct buffer_store *iq_matrix;
>> @@ -122,6 +127,7 @@ struct decode_state
>>
>>  struct encode_state
>>  {
>> +    struct codec_state_base base;
>>      struct buffer_store *seq_param;
>>      struct buffer_store *pic_param;
>>      struct buffer_store *pic_control;
>> @@ -152,6 +158,7 @@ struct encode_state
>>
>>  struct proc_state
>>  {
>> +    struct codec_state_base base;
>>      struct buffer_store *pipeline_param;
>>
>>      VASurfaceID current_render_target;
>> @@ -163,6 +170,7 @@ struct proc_state
>>
>>  union codec_state
>>  {
>> +    struct codec_state_base base;
>>      struct decode_state decode;
>>      struct encode_state encode;
>>      struct proc_state proc;
>> @@ -289,6 +297,7 @@ struct hw_codec_info
>>      int max_width;
>>      int max_height;
>>
>> +    unsigned int h264_dec_chroma_formats;
>>      unsigned int jpeg_dec_chroma_formats;
>>
>>      unsigned int has_mpeg2_decoding:1;
>
>


More information about the Libva mailing list