[Libva] [PATCH v2 intel-driver 8/8] decoder: h264: optimize support for grayscale surfaces.

Gwenole Beauchesne gb.devel at gmail.com
Thu May 15 01:36:02 PDT 2014


2014-05-15 10:25 GMT+02:00 Xiang, Haihao <haihao.xiang at intel.com>:
> On Wed, 2014-05-14 at 15:13 +0200, Gwenole Beauchesne wrote:
>> Optimize support for grayscale surfaces in two aspects: (i) space
>> by only allocating the luminance component ; (ii) speed by avoiding
>> initialization of the (now inexistent) chrominance planes.
>>
>> Keep backward compatibility with older codec layers that only
>> supported YUV 4:2:0 and not grayscale formats properly.
>>
>> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
>> ---
>>  src/gen6_mfd.c           |    8 ++++++--
>>  src/gen75_mfd.c          |    6 +++++-
>>  src/gen7_mfd.c           |    6 +++++-
>>  src/gen8_mfd.c           |    6 +++++-
>>  src/i965_decoder_utils.c |   23 +++++++++++++++++++----
>>  src/i965_drv_video.c     |   22 ++++++++++++++++++++++
>>  src/i965_drv_video.h     |    9 +++++++++
>>  7 files changed, 71 insertions(+), 9 deletions(-)
>>
>> diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
>> index 2092f69..f925d98 100755
>> --- a/src/gen6_mfd.c
>> +++ b/src/gen6_mfd.c
>> @@ -130,7 +130,11 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>  {
>>      struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
>>      struct object_surface *obj_surface = decode_state->render_object;
>> -
>> +    unsigned int surface_format;
>> +
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -138,7 +142,7 @@ gen6_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 19) |
>>                    ((obj_surface->orig_width - 1) << 6));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
>>                    (0 << 22) | /* surface object control state, FIXME??? */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>> index 5b023cf..895b194 100644
>> --- a/src/gen75_mfd.c
>> +++ b/src/gen75_mfd.c
>> @@ -137,12 +137,16 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>      struct object_surface *obj_surface = decode_state->render_object;
>>      unsigned int y_cb_offset;
>>      unsigned int y_cr_offset;
>> +    unsigned int surface_format;
>>
>>      assert(obj_surface);
>>
>>      y_cb_offset = obj_surface->y_cb_offset;
>>      y_cr_offset = obj_surface->y_cr_offset;
>>
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -150,7 +154,7 @@ gen75_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 18) |
>>                    ((obj_surface->orig_width - 1) << 4));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
>>                    (0 << 22) | /* surface object control state, ignored */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
>> index 70b1cec..2e0d653 100755
>> --- a/src/gen7_mfd.c
>> +++ b/src/gen7_mfd.c
>> @@ -135,12 +135,16 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>      struct object_surface *obj_surface = decode_state->render_object;
>>      unsigned int y_cb_offset;
>>      unsigned int y_cr_offset;
>> +    unsigned int surface_format;
>>
>>      assert(obj_surface);
>>
>>      y_cb_offset = obj_surface->y_cb_offset;
>>      y_cr_offset = obj_surface->y_cr_offset;
>>
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -148,7 +152,7 @@ gen7_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 18) |
>>                    ((obj_surface->orig_width - 1) << 4));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
>>                    (0 << 22) | /* surface object control state, ignored */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
>> index e3e71fb..10495d8 100644
>> --- a/src/gen8_mfd.c
>> +++ b/src/gen8_mfd.c
>> @@ -145,12 +145,16 @@ gen8_mfd_surface_state(VADriverContextP ctx,
>>      struct object_surface *obj_surface = decode_state->render_object;
>>      unsigned int y_cb_offset;
>>      unsigned int y_cr_offset;
>> +    unsigned int surface_format;
>>
>>      assert(obj_surface);
>>
>>      y_cb_offset = obj_surface->y_cb_offset;
>>      y_cr_offset = obj_surface->y_cr_offset;
>>
>> +    surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
>> +        MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
>> +
>>      BEGIN_BCS_BATCH(batch, 6);
>>      OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
>>      OUT_BCS_BATCH(batch, 0);
>> @@ -158,7 +162,7 @@ gen8_mfd_surface_state(VADriverContextP ctx,
>>                    ((obj_surface->orig_height - 1) << 18) |
>>                    ((obj_surface->orig_width - 1) << 4));
>>      OUT_BCS_BATCH(batch,
>> -                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
>> +                  (surface_format << 28) | /* 420 planar YUV surface */
>>                    ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
>>                    (0 << 22) | /* surface object control state, ignored */
>>                    ((obj_surface->width - 1) << 3) | /* pitch */
>> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
>> index 6cec08b..ae01d13 100644
>> --- a/src/i965_decoder_utils.c
>> +++ b/src/i965_decoder_utils.c
>> @@ -185,25 +185,40 @@ avc_ensure_surface_bo(
>>  )
>>  {
>>      VAStatus va_status;
>> -    uint32_t hw_fourcc, fourcc, subsample;
>> +    uint32_t hw_fourcc, fourcc, subsample, chroma_format;
>>
>>      /* Validate chroma format */
>>      switch (pic_param->seq_fields.bits.chroma_format_idc) {
>>      case 0: // Grayscale
>>          fourcc = VA_FOURCC_Y800;
>>          subsample = SUBSAMPLE_YUV400;
>> +        chroma_format = VA_RT_FORMAT_YUV400;
>>          break;
>>      case 1: // YUV 4:2:0
>>          fourcc = VA_FOURCC_NV12;
>>          subsample = SUBSAMPLE_YUV420;
>> +        chroma_format = VA_RT_FORMAT_YUV420;
>>          break;
>>      default:
>>          return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>>      }
>>
>> -    /* XXX: always allocate NV12 (YUV 4:2:0) surfaces for now */
>> -    hw_fourcc = VA_FOURCC_NV12;
>> -    subsample = SUBSAMPLE_YUV420;
>> +    /* Determine the HW surface format, bound to VA config needs */
>> +    if ((decode_state->base.chroma_formats & chroma_format) == chroma_format)
>> +        hw_fourcc = fourcc;
>> +    else {
>> +        hw_fourcc = 0;
>> +        switch (fourcc) {
>> +        case VA_FOURCC_Y800: // Implement with an NV12 surface
>> +            if (decode_state->base.chroma_formats & VA_RT_FORMAT_YUV420) {
>> +                hw_fourcc = VA_FOURCC_NV12;
>> +                subsample = SUBSAMPLE_YUV420;
>> +            }
>> +            break;
>> +        }
>> +    }
>> +    if (!hw_fourcc)
>> +        return VA_STATUS_ERROR_UNSUPPORTED_RT_FORMAT;
>>
>>      /* (Re-)allocate the underlying surface buffer store, if necessary */
>>      if (!obj_surface->bo || obj_surface->fourcc != hw_fourcc) {
>> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
>> index 768469a..686f4e3 100755
>> --- a/src/i965_drv_video.c
>> +++ b/src/i965_drv_video.c
>> @@ -214,6 +214,10 @@ get_subpic_format(const VAImageFormat *va_format)
>>      return NULL;
>>  }
>>
>> +/* Extra set of chroma formats supported for H.264 decoding (beyond YUV 4:2:0) */
>> +#define EXTRA_H264_DEC_CHROMA_FORMATS \
>> +    (VA_RT_FORMAT_YUV400)
>> +
>>  /* Extra set of chroma formats supported for JPEG decoding (beyond YUV 4:2:0) */
>>  #define EXTRA_JPEG_DEC_CHROMA_FORMATS \
>>      (VA_RT_FORMAT_YUV411 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444)
>> @@ -257,6 +261,8 @@ static struct hw_codec_info gen6_hw_codec_info = {
>>      .max_width = 2048,
>>      .max_height = 2048,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>> +
>>      .has_mpeg2_decoding = 1,
>>      .has_h264_decoding = 1,
>>      .has_h264_encoding = 1,
>> @@ -282,6 +288,7 @@ static struct hw_codec_info gen7_hw_codec_info = {
>>      .max_width = 4096,
>>      .max_height = 4096,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>>
>>      .has_mpeg2_decoding = 1,
>> @@ -311,6 +318,7 @@ static struct hw_codec_info gen75_hw_codec_info = {
>>      .max_width = 4096,
>>      .max_height = 4096,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>>
>>      .has_mpeg2_decoding = 1,
>> @@ -344,6 +352,7 @@ static struct hw_codec_info gen8_hw_codec_info = {
>>      .max_width = 4096,
>>      .max_height = 4096,
>>
>> +    .h264_dec_chroma_formats = EXTRA_H264_DEC_CHROMA_FORMATS,
>>      .jpeg_dec_chroma_formats = EXTRA_JPEG_DEC_CHROMA_FORMATS,
>>
>>      .has_mpeg2_decoding = 1,
>> @@ -602,6 +611,13 @@ i965_get_default_chroma_formats(VADriverContextP ctx, VAProfile profile,
>>      uint32_t chroma_formats = VA_RT_FORMAT_YUV420;
>>
>>      switch (profile) {
>> +    case VAProfileH264ConstrainedBaseline:
>> +    case VAProfileH264Main:
>> +    case VAProfileH264High:
>> +        if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
>
> s/JPEG/H264

Thanks for spotting this stupid error. Locally fixed.

Regards,
Gwenole.

>
>> +            chroma_formats |= i965->codec_info->h264_dec_chroma_formats;
>> +        break;
>> +
>>      case VAProfileJPEGBaseline:
>>          if (HAS_JPEG_DECODING(i965) && entrypoint == VAEntrypointVLD)
>>              chroma_formats |= i965->codec_info->jpeg_dec_chroma_formats;
>> @@ -1686,6 +1702,7 @@ i965_CreateContext(VADriverContextP ctx,
>>      struct i965_render_state *render_state = &i965->render_state;
>>      struct object_config *obj_config = CONFIG(config_id);
>>      struct object_context *obj_context = NULL;
>> +    VAConfigAttrib *attrib;
>>      VAStatus vaStatus = VA_STATUS_SUCCESS;
>>      int contextID;
>>      int i;
>> @@ -1779,6 +1796,11 @@ i965_CreateContext(VADriverContextP ctx,
>>          }
>>      }
>>
>> +    attrib = i965_lookup_config_attribute(obj_config, VAConfigAttribRTFormat);
>> +    if (!attrib)
>> +        return VA_STATUS_ERROR_INVALID_CONFIG;
>> +    obj_context->codec_state.base.chroma_formats = attrib->value;
>> +
>>      /* Error recovery */
>>      if (VA_STATUS_SUCCESS != vaStatus) {
>>          i965_destroy_context(&i965->context_heap, (struct object_base *)obj_context);
>> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
>> index e70852b..e14dcf8 100644
>> --- a/src/i965_drv_video.h
>> +++ b/src/i965_drv_video.h
>> @@ -101,8 +101,13 @@ struct object_config
>>
>>  #define NUM_SLICES     10
>>
>> +struct codec_state_base {
>> +    uint32_t chroma_formats;
>> +};
>> +
>>  struct decode_state
>>  {
>> +    struct codec_state_base base;
>>      struct buffer_store *pic_param;
>>      struct buffer_store **slice_params;
>>      struct buffer_store *iq_matrix;
>> @@ -122,6 +127,7 @@ struct decode_state
>>
>>  struct encode_state
>>  {
>> +    struct codec_state_base base;
>>      struct buffer_store *seq_param;
>>      struct buffer_store *pic_param;
>>      struct buffer_store *pic_control;
>> @@ -152,6 +158,7 @@ struct encode_state
>>
>>  struct proc_state
>>  {
>> +    struct codec_state_base base;
>>      struct buffer_store *pipeline_param;
>>
>>      VASurfaceID current_render_target;
>> @@ -163,6 +170,7 @@ struct proc_state
>>
>>  union codec_state
>>  {
>> +    struct codec_state_base base;
>>      struct decode_state decode;
>>      struct encode_state encode;
>>      struct proc_state proc;
>> @@ -289,6 +297,7 @@ struct hw_codec_info
>>      int max_width;
>>      int max_height;
>>
>> +    unsigned int h264_dec_chroma_formats;
>>      unsigned int jpeg_dec_chroma_formats;
>>
>>      unsigned int has_mpeg2_decoding:1;
>
>


More information about the Libva mailing list