[Libva] [PATCH v3] H.264 encoder: add a simple reactive VBR rate control mode

Mark Thompson sw at jkqxz.net
Tue Jan 10 00:04:43 UTC 2017


This implements a simple reactive VBR rate control mode for single-layer H.264.
The primary aim here is to avoid the problematic behaviour that the CBR rate
controller displays on scene changes, where the QP can get pushed up by a large
amount in a short period and compromise the quality of following frames to a
very visible degree.

The main idea, then, is to try to keep the HRD buffering above the target level
most of the time, so that when a large frame is generated (on a scene change or
when the stream complexity increases) we have plenty of slack to be able to
encode the more difficult region without compromising quality immediately on
the following frames.   It is optimistic about the complexity of future frames,
so even after generating one or more large frames on a significant change it
will try to keep the QP at its current level until the HRD buffer bounds force
a change to maintain the intended rate.

Compared to the CBR rate controller, it keeps the quality level much more
stable - QP does not always spike up as large frames are generated when the
complexity of the stream increases transiently, but equally it does not reduce
as quickly when the complexity of the stream decreases.

Signed-off-by: Mark Thompson <sw at jkqxz.net>
---
On 09/01/17 05:23, Xiang, Haihao wrote:
>> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], (int)encoder_context->brc.min_qp, 51);
>> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], (int)encoder_context->brc.min_qp, 51);
>> +    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], (int)encoder_context->brc.min_qp, 51);
> 
> The lower bound is 1 when encoder_context->brc.min_qp is equal to 0.
> 
>> +
>> +    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
>> +        sts = BRC_UNDERFLOW_WITH_MAX_QP;
>> +    if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
> 
> Same as above

Apologies, I missed updating it to match 33a32935ac9e2622adc5c59045d565b4e5904749.

Fixed in the same way as that patch in the version.

Thanks,

- Mark


 src/gen6_mfc.c        |  10 ++--
 src/gen6_mfc_common.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++--
 src/gen75_mfc.c       |  10 ++--
 src/gen8_mfc.c        |  10 ++--
 src/i965_drv_video.c  |   5 +-
 5 files changed, 141 insertions(+), 20 deletions(-)

diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0) 
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                              pSliceParameter,
                              encode_state,
                              encoder_context,
-                             (rate_control_mode == VA_RC_CBR),
+                             (rate_control_mode != VA_RC_CQP),
                              qp_slice,
                              slice_batch);
 
@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);	//filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fbedc94..0d21a11 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -127,6 +127,9 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
         }
 
+        if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
+            bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
+
         if (i == encoder_context->layer.num_layers - 1)
             factor = 1.0;
         else {
@@ -219,9 +222,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
     return BRC_NO_HRD_VIOLATION;
 }
 
-int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct intel_encoder_context *encoder_context,
-                           int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context *encoder_context,
+                                      int frame_bits)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -368,6 +371,121 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
     return sts;
 }
 
+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context *encoder_context,
+                                      int frame_bits)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    gen6_brc_status sts;
+    VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+    int *qp = mfc_context->brc.qp_prime_y[0];
+    int min_qp = MAX(1, encoder_context->brc.min_qp);
+    int qp_delta, large_frame_adjustment;
+
+    // This implements a simple reactive VBR rate control mode for single-layer H.264.  The primary
+    // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
+    // scene changes, where the QP can get pushed up by a large amount in a short period and
+    // compromise the quality of following frames to a very visible degree.
+    // The main idea, then, is to try to keep the HRD buffering above the target level most of the
+    // time, so that when a large frame is generated (on a scene change or when the stream
+    // complexity increases) we have plenty of slack to be able to encode the more difficult region
+    // without compromising quality immediately on the following frames.   It is optimistic about
+    // the complexity of future frames, so even after generating one or more large frames on a
+    // significant change it will try to keep the QP at its current level until the HRD buffer
+    // bounds force a change to maintain the intended rate.
+
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    // This adjustment is applied to increase the QP by more than we normally would if a very
+    // large frame is encountered and we are in danger of running out of slack.
+    large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
+
+    if (sts == BRC_UNDERFLOW) {
+        // The frame is far too big and we don't have the bits available to send it, so it will
+        // have to be re-encoded at a higher QP.
+        qp_delta = +2;
+        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+            qp_delta += large_frame_adjustment;
+    } else if (sts == BRC_OVERFLOW) {
+        // The frame is very small and we are now overflowing the HRD buffer.  Currently this case
+        // does not occur because we ignore overflow in VBR mode.
+        assert(0 && "Overflow in VBR mode");
+    } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
+        // The frame is smaller than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] >
+            (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
+            // We currently have lots of bits available, so decrease the QP slightly for the next
+            // frame.
+            qp_delta = -1;
+        } else {
+            // The HRD buffer fullness is increasing, so do nothing.  (We may be under the target
+            // level here, but are moving in the right direction.)
+            qp_delta = 0;
+        }
+    } else {
+        // The frame is larger than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
+            // We are currently over the target level, so do nothing.
+            qp_delta = 0;
+        } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+            // We are under the target level, but not critically.  Increase the QP by one step if
+            // continuing like this would underflow soon (currently within one second).
+            if (mfc_context->hrd.current_buffer_fullness[0] /
+                (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+                ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
+                qp_delta = +1;
+            else
+                qp_delta = 0;
+        } else {
+            // We are a long way under the target level.  Always increase the QP, possibly by a
+            // larger amount dependent on how big the frame we just made actually was.
+            qp_delta = +1 + large_frame_adjustment;
+        }
+    }
+
+    switch (slice_type) {
+    case SLICE_TYPE_I:
+        qp[SLICE_TYPE_I] += qp_delta;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+        break;
+    case SLICE_TYPE_P:
+        qp[SLICE_TYPE_P] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+        break;
+    case SLICE_TYPE_B:
+        qp[SLICE_TYPE_B] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+        break;
+    }
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
+        sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+    return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+                           struct intel_encoder_context *encoder_context,
+                           int frame_bits)
+{
+    switch (encoder_context->rate_control_mode) {
+    case VA_RC_CBR:
+        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
+    case VA_RC_VBR:
+        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
+    }
+    assert(0 && "Invalid RC mode");
+}
+
 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context *encoder_context)
 {
@@ -427,7 +545,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
         encoder_context->codec != CODEC_H264_MVC)
         return;
 
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         /*Programing bit rate control */
         if (encoder_context->brc.need_reset) {
             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                               pPicParameter,
                               pSliceParameter,
                               encode_state, encoder_context,
-                              (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                              (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);	//filling the pipeline
         gen75_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);	//filling the pipeline
         gen8_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 76cb915..cc37190 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
                     profile != VAProfileMPEG2Simple)
                     attrib_list[i].value |= VA_RC_CBR;
 
-                if (profile == VAProfileVP9Profile0)
+                if (profile == VAProfileVP9Profile0 ||
+                    profile == VAProfileH264ConstrainedBaseline ||
+                    profile == VAProfileH264Main ||
+                    profile == VAProfileH264High)
                     attrib_list[i].value |= VA_RC_VBR;
 
                 break;
-- 
2.11.0


More information about the Libva mailing list