[Libva] [PATCH] H.264 encoder: add a simple VBR rate control mode

Xiang, Haihao haihao.xiang at intel.com
Fri Jan 6 07:09:50 UTC 2017


> Signed-off-by: Mark Thompson <sw at jkqxz.net>
> ---
> From the comment in the code:
> 
> // This implements a simple reactive VBR rate controller for single-layer
> H.264.
> // The main idea here is to try to keep the HRD buffer above the target level
> most of the time,
> // so that when a large frame is generated (on a scene change) we have plenty
> of slack to be
> // able to encode it without compromising quality on the following frames.  It
> is optimistic
> // about the complexity of future frames, so after generating a large frame on
> a significant
> // change (particularly whole-screen transitions) it will try to keep the QP
> at its current
> // level unless the HRD buffer bounds force a change to maintain the intended
> rate.
> 
> The primary aim of this is to avoid the problematic behaviour that the CBR
> rate controller has on scene changes, where the QP can get pushed up by a
> large amount and compromise the quality of following frames to a very visible
> degree.
> 
> To visualise the effect of it, here is the QP and frame sizes of same sequence
> encoded with the same parameters with the CBR and VBR RC modes:
> 
> <http://ixia.jkqxz.net/~mrt/libva/rc/cbr.svg>
> <http://ixia.jkqxz.net/~mrt/libva/rc/vbr.svg>
> 
> (The two graphs have identical scales.  The sequence is the first 10000 frames
> of Big Buck Bunny (which usefully has very varied complexity): 1280x720 at
> 60fps, target bitrate 2Mbps, HRD buffer 12Mb, 250 frame GOP, 2 B frames, min
> QP 18, initial QP 32.)
> 
> Note in particular how the spikes in QP from the CBR rate controller are
> mostly avoided (around frames 1600, 3100, 6300, 9100 in the example), and how
> the VBR mode has much less variation in the QP level.  Also note how the VBR
> mode often has higher average QP than the CBR mode does, particularly when
> complexity is decreasing - this is what is lost in the attempt to improve the
> worst-case behaviour.
> 
> Written and tested on gen9; hopefully it works on the older platforms too
> though I haven't actually tested it.  It only works for single-layer video, I
> haven't considered multiple-layer video at all - probably it wants some code
> to at least reject that case, but I don't have any test setup for that so I've
> avoided it for now.

Could you add the above message in your commit log? I don't see the message
after applying this patch to my local branch.

The expected target bitrate for VBR is (target_percentage * bits_per_second), 
e.g. for vp9

vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context-
>brc.target_percentage[0] / 100

we should keep the same behavior for all codecs.

Thanks
Haihao


> 
> Thanks,
> 
> - Mark
> 
> 
>  src/gen6_mfc.c        |  10 ++---
>  src/gen6_mfc_common.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++-
> -
>  src/gen75_mfc.c       |  10 ++---
>  src/gen8_mfc.c        |  10 ++---
>  src/i965_drv_video.c  |   5 ++-
>  5 files changed, 133 insertions(+), 20 deletions(-)
> 
> diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
> index 8077c14..1765530 100644
> --- a/src/gen6_mfc.c
> +++ b/src/gen6_mfc.c
> @@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP
> ctx,
>      int qp_mb;
>  
>      qp_slice = qp;
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          qp = mfc_context->brc.qp_prime_y[encoder_context-
> >layer.curr_frame_layer_id][slice_type];
>          if (encode_state->slice_header_index[slice_index] == 0) {
>              pSliceParameter->slice_qp_delta = qp - pPicParameter-
> >pic_init_qp;
> @@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP
> ctx,
>                               pPicParameter,
>                               pSliceParameter,
>                               encode_state, encoder_context,
> -                             (rate_control_mode == VA_RC_CBR), qp_slice,
> slice_batch);
> +                             (rate_control_mode != VA_RC_CQP), qp_slice,
> slice_batch);
>  
>      if ( slice_index == 0) 
>          intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
> encoder_context, slice_batch);
> @@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
>      int qp_slice;
>  
>      qp_slice = qp;
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          qp = mfc_context->brc.qp_prime_y[encoder_context-
> >layer.curr_frame_layer_id][slice_type];
>          if (encode_state->slice_header_index[slice_index] == 0) {
>              pSliceParameter->slice_qp_delta = qp - pPicParameter-
> >pic_init_qp;
> @@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
>                               pSliceParameter,
>                               encode_state,
>                               encoder_context,
> -                             (rate_control_mode == VA_RC_CBR),
> +                             (rate_control_mode != VA_RC_CQP),
>                               qp_slice,
>                               slice_batch);
>  
> @@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
>          /*Programing bcs pipeline*/
>          gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);
> 	//filling the pipeline
>          gen6_mfc_run(ctx, encode_state, encoder_context);
> -        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
> VA_RC_VBR*/) {
> +        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR)
> {
>              gen6_mfc_stop(ctx, encode_state, encoder_context,
> &current_frame_bits_size);
>              sts = intel_mfc_brc_postpack(encode_state, encoder_context,
> current_frame_bits_size);
>              if (sts == BRC_NO_HRD_VIOLATION) {
> diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
> index 8907751..95afa36 100644
> --- a/src/gen6_mfc_common.c
> +++ b/src/gen6_mfc_common.c
> @@ -218,9 +218,9 @@ int intel_mfc_update_hrd(struct encode_state
> *encode_state,
>      return BRC_NO_HRD_VIOLATION;
>  }
>  
> -int intel_mfc_brc_postpack(struct encode_state *encode_state,
> -                           struct intel_encoder_context *encoder_context,
> -                           int frame_bits)
> +static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
> +                                      struct intel_encoder_context
> *encoder_context,
> +                                      int frame_bits)
>  {
>      struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
>      gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
> @@ -366,6 +366,116 @@ int intel_mfc_brc_postpack(struct encode_state
> *encode_state,
>      return sts;
>  }
>  
> +static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
> +                                      struct intel_encoder_context
> *encoder_context,
> +                                      int frame_bits)
> +{
> +    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
> +    gen6_brc_status sts;
> +    VAEncSliceParameterBufferH264 *pSliceParameter =
> (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
> +    int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter-
> >slice_type);
> +    int *qp = mfc_context->brc.qp_prime_y[0];
> +    int qp_delta, large_frame_adjustment;
> +
> +    // This implements a simple reactive VBR rate controller for single-layer 
> H.264.
> +    // The main idea here is to try to keep the HRD buffer above the target
> level most of the time,
> +    // so that when a large frame is generated (on a scene change) we have
> plenty of slack to be
> +    // able to encode it without compromising quality on the following
> frames.  It is optimistic
> +    // about the complexity of future frames, so after generating a large
> frame on a significant
> +    // change (particularly whole-screen transitions) it will try to keep the
> QP at its current
> +    // level unless the HRD buffer bounds force a change to maintain the
> intended rate.
> +
> +    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
> +
> +    // This adjustment is applied to increase the QP by more than we normally
> would if a very
> +    // large frame is encountered and we are in danger of running out of
> slack.
> +    large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context-
> >brc.target_frame_size[0][slice_type]));
> +
> +    if (sts == BRC_UNDERFLOW) {
> +        // The frame is far too big and we don't have the bits available to
> send it, so it will
> +        // have to be re-encoded at a higher QP.
> +        qp_delta = +2;
> +        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
> +            qp_delta += large_frame_adjustment;
> +    } else if (sts == BRC_OVERFLOW) {
> +        // The frame is very small and we are now overflowing the HRD
> buffer.  Currently this case
> +        // does not occur because we ignore overflow in VBR mode.
> +        assert(0 && "Overflow in VBR mode");
> +    } else if (frame_bits <= mfc_context-
> >brc.target_frame_size[0][slice_type]) {
> +        // The frame is smaller than the average size expected for this frame
> type.
> +        if (mfc_context->hrd.current_buffer_fullness[0] >
> +            (mfc_context->hrd.target_buffer_fullness[0] + mfc_context-
> >hrd.buffer_size[0]) / 2.0) {
> +            // We currently have lots of bits available, so decrease the QP
> slightly for the next
> +            // frame.
> +            qp_delta = -1;
> +        } else {
> +            // The HRD buffer fullness is increasing, so do nothing.  (We may
> be under the target
> +            // level here, but are moving in the right direction.)
> +            qp_delta = 0;
> +        }
> +    } else {
> +        // The frame is larger than the average size expected for this frame
> type.
> +        if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context-
> >hrd.target_buffer_fullness[0]) {
> +            // We are currently over the target level, so do nothing.
> +            qp_delta = 0;
> +        } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context-
> >hrd.target_buffer_fullness[0] / 2.0) {
> +            // We are under the target level, but not critically.  Increase
> the QP by one step if
> +            // continuing like this would underflow soon (currently within
> one second).
> +            if (mfc_context->hrd.current_buffer_fullness[0] /
> +                (double)(frame_bits - mfc_context-
> >brc.target_frame_size[0][slice_type] + 1) <
> +                ((double)encoder_context->brc.framerate[0].num /
> (double)encoder_context->brc.framerate[0].den))
> +                qp_delta = +1;
> +            else
> +                qp_delta = 0;
> +        } else {
> +            // We are a long way under the target level.  Always increase the
> QP, possibly by a
> +            // larger amount dependent on how big the frame we just made
> actually was.
> +            qp_delta = +1 + large_frame_adjustment;
> +        }
> +    }
> +
> +    switch (slice_type) {
> +    case SLICE_TYPE_I:
> +        qp[SLICE_TYPE_I] += qp_delta;
> +        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
> +        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
> +        break;
> +    case SLICE_TYPE_P:
> +        qp[SLICE_TYPE_P] += qp_delta;
> +        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
> +        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
> +        break;
> +    case SLICE_TYPE_B:
> +        qp[SLICE_TYPE_B] += qp_delta;
> +        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
> +        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
> +        break;
> +    }
> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I],
> (int)encoder_context->brc.min_qp, 51);
> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P],
> (int)encoder_context->brc.min_qp, 51);
> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B],
> (int)encoder_context->brc.min_qp, 51);
> +
> +    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
> +        sts = BRC_UNDERFLOW_WITH_MAX_QP;
> +    if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
> +        sts = BRC_OVERFLOW_WITH_MIN_QP;
> +
> +    return sts;
> +}
> +
> +int intel_mfc_brc_postpack(struct encode_state *encode_state,
> +                           struct intel_encoder_context *encoder_context,
> +                           int frame_bits)
> +{
> +    switch (encoder_context->rate_control_mode) {
> +    case VA_RC_CBR:
> +        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context,
> frame_bits);
> +    case VA_RC_VBR:
> +        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context,
> frame_bits);
> +    }
> +    assert(0 && "Invalid RC mode");
> +}
> +
>  static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
>                                         struct intel_encoder_context
> *encoder_context)
>  {
> @@ -425,7 +535,7 @@ void intel_mfc_brc_prepare(struct encode_state
> *encode_state,
>          encoder_context->codec != CODEC_H264_MVC)
>          return;
>  
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          /*Programing bit rate control */
>          if (encoder_context->brc.need_reset) {
>              intel_mfc_bit_rate_control_context_init(encode_state,
> encoder_context);
> diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
> index 0fbbe76..7b76b99 100644
> --- a/src/gen75_mfc.c
> +++ b/src/gen75_mfc.c
> @@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP
> ctx,
>      int qp_mb;
>  
>      qp_slice = qp;
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          qp = mfc_context->brc.qp_prime_y[encoder_context-
> >layer.curr_frame_layer_id][slice_type];
>          if (encode_state->slice_header_index[slice_index] == 0) {
>              pSliceParameter->slice_qp_delta = qp - pPicParameter-
> >pic_init_qp;
> @@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP
> ctx,
>                                pPicParameter,
>                                pSliceParameter,
>                                encode_state, encoder_context,
> -                              (rate_control_mode == VA_RC_CBR), qp_slice,
> slice_batch);
> +                              (rate_control_mode != VA_RC_CQP), qp_slice,
> slice_batch);
>  
>      if ( slice_index == 0)
>          intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
> encoder_context, slice_batch);
> @@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
>      int qp_slice;
>  
>      qp_slice = qp;
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          qp = mfc_context->brc.qp_prime_y[encoder_context-
> >layer.curr_frame_layer_id][slice_type];
>          if (encode_state->slice_header_index[slice_index] == 0) {
>              pSliceParameter->slice_qp_delta = qp - pPicParameter-
> >pic_init_qp;
> @@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
>                                pSliceParameter,
>                                encode_state,
>                                encoder_context,
> -                              (rate_control_mode == VA_RC_CBR),
> +                              (rate_control_mode != VA_RC_CQP),
>                                qp_slice,
>                                slice_batch);
>  
> @@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
>          /*Programing bcs pipeline*/
>          gen75_mfc_avc_pipeline_programing(ctx, encode_state,
> encoder_context);	//filling the pipeline
>          gen75_mfc_run(ctx, encode_state, encoder_context);
> -        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
> VA_RC_VBR*/) {
> +        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR)
> {
>              gen75_mfc_stop(ctx, encode_state, encoder_context,
> &current_frame_bits_size);
>              sts = intel_mfc_brc_postpack(encode_state, encoder_context,
> current_frame_bits_size);
>              if (sts == BRC_NO_HRD_VIOLATION) {
> diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
> index 90119d7..8e68c7c 100644
> --- a/src/gen8_mfc.c
> +++ b/src/gen8_mfc.c
> @@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP
> ctx,
>      int qp_mb;
>  
>      qp_slice = qp;
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          qp = mfc_context->brc.qp_prime_y[encoder_context-
> >layer.curr_frame_layer_id][slice_type];
>          if (encode_state->slice_header_index[slice_index] == 0) {
>              pSliceParameter->slice_qp_delta = qp - pPicParameter-
> >pic_init_qp;
> @@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP
> ctx,
>                               pPicParameter,
>                               pSliceParameter,
>                               encode_state, encoder_context,
> -                             (rate_control_mode == VA_RC_CBR), qp_slice,
> slice_batch);
> +                             (rate_control_mode != VA_RC_CQP), qp_slice,
> slice_batch);
>  
>      if ( slice_index == 0)
>          intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
> encoder_context, slice_batch);
> @@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
>      int qp_slice;
>  
>      qp_slice = qp;
> -    if (rate_control_mode == VA_RC_CBR) {
> +    if (rate_control_mode != VA_RC_CQP) {
>          qp = mfc_context->brc.qp_prime_y[encoder_context-
> >layer.curr_frame_layer_id][slice_type];
>          if (encode_state->slice_header_index[slice_index] == 0) {
>              pSliceParameter->slice_qp_delta = qp - pPicParameter-
> >pic_init_qp;
> @@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
>                                pSliceParameter,
>                                encode_state,
>                                encoder_context,
> -                              (rate_control_mode == VA_RC_CBR),
> +                              (rate_control_mode != VA_RC_CQP),
>                                qp_slice,
>                                slice_batch);
>  
> @@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
>          /*Programing bcs pipeline*/
>          gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);
> 	//filling the pipeline
>          gen8_mfc_run(ctx, encode_state, encoder_context);
> -        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
> VA_RC_VBR*/) {
> +        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR)
> {
>              gen8_mfc_stop(ctx, encode_state, encoder_context,
> &current_frame_bits_size);
>              sts = intel_mfc_brc_postpack(encode_state, encoder_context,
> current_frame_bits_size);
>              if (sts == BRC_NO_HRD_VIOLATION) {
> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
> index 51a708c..b5e4c17 100644
> --- a/src/i965_drv_video.c
> +++ b/src/i965_drv_video.c
> @@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
>                      profile != VAProfileMPEG2Simple)
>                      attrib_list[i].value |= VA_RC_CBR;
>  
> -                if (profile == VAProfileVP9Profile0)
> +                if (profile == VAProfileVP9Profile0 ||
> +                    profile == VAProfileH264ConstrainedBaseline ||
> +                    profile == VAProfileH264Main ||
> +                    profile == VAProfileH264High)
>                      attrib_list[i].value |= VA_RC_VBR;
>  
>                  break;


More information about the Libva mailing list