[Libva] [PATCH] H.264 encoder: add a simple VBR rate control mode

Mark Thompson sw at jkqxz.net
Sun Jan 1 14:28:24 UTC 2017


Signed-off-by: Mark Thompson <sw at jkqxz.net>
---
>From the comment in the code:

// This implements a simple reactive VBR rate controller for single-layer H.264.
// The main idea here is to try to keep the HRD buffer above the target level most of the time,
// so that when a large frame is generated (on a scene change) we have plenty of slack to be
// able to encode it without compromising quality on the following frames.  It is optimistic
// about the complexity of future frames, so after generating a large frame on a significant
// change (particularly whole-screen transitions) it will try to keep the QP at its current
// level unless the HRD buffer bounds force a change to maintain the intended rate.

The primary aim of this is to avoid the problematic behaviour that the CBR rate controller has on scene changes, where the QP can get pushed up by a large amount and compromise the quality of following frames to a very visible degree.

To visualise the effect of it, here is the QP and frame sizes of same sequence encoded with the same parameters with the CBR and VBR RC modes:

<http://ixia.jkqxz.net/~mrt/libva/rc/cbr.svg>
<http://ixia.jkqxz.net/~mrt/libva/rc/vbr.svg>

(The two graphs have identical scales.  The sequence is the first 10000 frames of Big Buck Bunny (which usefully has very varied complexity): 1280x720 at 60fps, target bitrate 2Mbps, HRD buffer 12Mb, 250 frame GOP, 2 B frames, min QP 18, initial QP 32.)

Note in particular how the spikes in QP from the CBR rate controller are mostly avoided (around frames 1600, 3100, 6300, 9100 in the example), and how the VBR mode has much less variation in the QP level.  Also note how the VBR mode often has higher average QP than the CBR mode does, particularly when complexity is decreasing - this is what is lost in the attempt to improve the worst-case behaviour.

Written and tested on gen9; hopefully it works on the older platforms too though I haven't actually tested it.  It only works for single-layer video, I haven't considered multiple-layer video at all - probably it wants some code to at least reject that case, but I don't have any test setup for that so I've avoided it for now.

Thanks,

- Mark


 src/gen6_mfc.c        |  10 ++---
 src/gen6_mfc_common.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++--
 src/gen75_mfc.c       |  10 ++---
 src/gen8_mfc.c        |  10 ++---
 src/i965_drv_video.c  |   5 ++-
 5 files changed, 133 insertions(+), 20 deletions(-)

diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0) 
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                              pSliceParameter,
                              encode_state,
                              encoder_context,
-                             (rate_control_mode == VA_RC_CBR),
+                             (rate_control_mode != VA_RC_CQP),
                              qp_slice,
                              slice_batch);
 
@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);	//filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index 8907751..95afa36 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -218,9 +218,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
     return BRC_NO_HRD_VIOLATION;
 }
 
-int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct intel_encoder_context *encoder_context,
-                           int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context *encoder_context,
+                                      int frame_bits)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -366,6 +366,116 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
     return sts;
 }
 
+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context *encoder_context,
+                                      int frame_bits)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    gen6_brc_status sts;
+    VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+    int *qp = mfc_context->brc.qp_prime_y[0];
+    int qp_delta, large_frame_adjustment;
+
+    // This implements a simple reactive VBR rate controller for single-layer H.264.
+    // The main idea here is to try to keep the HRD buffer above the target level most of the time,
+    // so that when a large frame is generated (on a scene change) we have plenty of slack to be
+    // able to encode it without compromising quality on the following frames.  It is optimistic
+    // about the complexity of future frames, so after generating a large frame on a significant
+    // change (particularly whole-screen transitions) it will try to keep the QP at its current
+    // level unless the HRD buffer bounds force a change to maintain the intended rate.
+
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    // This adjustment is applied to increase the QP by more than we normally would if a very
+    // large frame is encountered and we are in danger of running out of slack.
+    large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
+
+    if (sts == BRC_UNDERFLOW) {
+        // The frame is far too big and we don't have the bits available to send it, so it will
+        // have to be re-encoded at a higher QP.
+        qp_delta = +2;
+        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+            qp_delta += large_frame_adjustment;
+    } else if (sts == BRC_OVERFLOW) {
+        // The frame is very small and we are now overflowing the HRD buffer.  Currently this case
+        // does not occur because we ignore overflow in VBR mode.
+        assert(0 && "Overflow in VBR mode");
+    } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
+        // The frame is smaller than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] >
+            (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
+            // We currently have lots of bits available, so decrease the QP slightly for the next
+            // frame.
+            qp_delta = -1;
+        } else {
+            // The HRD buffer fullness is increasing, so do nothing.  (We may be under the target
+            // level here, but are moving in the right direction.)
+            qp_delta = 0;
+        }
+    } else {
+        // The frame is larger than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
+            // We are currently over the target level, so do nothing.
+            qp_delta = 0;
+        } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+            // We are under the target level, but not critically.  Increase the QP by one step if
+            // continuing like this would underflow soon (currently within one second).
+            if (mfc_context->hrd.current_buffer_fullness[0] /
+                (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+                ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
+                qp_delta = +1;
+            else
+                qp_delta = 0;
+        } else {
+            // We are a long way under the target level.  Always increase the QP, possibly by a
+            // larger amount dependent on how big the frame we just made actually was.
+            qp_delta = +1 + large_frame_adjustment;
+        }
+    }
+
+    switch (slice_type) {
+    case SLICE_TYPE_I:
+        qp[SLICE_TYPE_I] += qp_delta;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+        break;
+    case SLICE_TYPE_P:
+        qp[SLICE_TYPE_P] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+        break;
+    case SLICE_TYPE_B:
+        qp[SLICE_TYPE_B] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+        break;
+    }
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], (int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], (int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], (int)encoder_context->brc.min_qp, 51);
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
+        sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+    return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+                           struct intel_encoder_context *encoder_context,
+                           int frame_bits)
+{
+    switch (encoder_context->rate_control_mode) {
+    case VA_RC_CBR:
+        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
+    case VA_RC_VBR:
+        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
+    }
+    assert(0 && "Invalid RC mode");
+}
+
 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context *encoder_context)
 {
@@ -425,7 +535,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
         encoder_context->codec != CODEC_H264_MVC)
         return;
 
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         /*Programing bit rate control */
         if (encoder_context->brc.need_reset) {
             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                               pPicParameter,
                               pSliceParameter,
                               encode_state, encoder_context,
-                              (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                              (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);	//filling the pipeline
         gen75_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);	//filling the pipeline
         gen8_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 51a708c..b5e4c17 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
                     profile != VAProfileMPEG2Simple)
                     attrib_list[i].value |= VA_RC_CBR;
 
-                if (profile == VAProfileVP9Profile0)
+                if (profile == VAProfileVP9Profile0 ||
+                    profile == VAProfileH264ConstrainedBaseline ||
+                    profile == VAProfileH264Main ||
+                    profile == VAProfileH264High)
                     attrib_list[i].value |= VA_RC_VBR;
 
                 break;
-- 
2.11.0



More information about the Libva mailing list