[Libva] [PATCH 2/4] Set the pipeline to use the new VP8 encoding shaders on BSW

Xiang, Haihao haihao.xiang at intel.com
Wed Jan 11 01:21:06 UTC 2017


> 
> On Tue, Jan 10, 2017 at 4:21 PM, Mark Thompson <sw at jkqxz.net> wrote:
> > On 10/01/17 22:02, Sean V Kelley wrote:
> > > From: "Xiang, Haihao" <haihao.xiang at intel.com>
> > >
> > > Currently only one temporal layer is supported
> > >
> > > Signed-off-by: Xiang, Haihao <haihao.xiang at intel.com>
> > > Reviewed-by: Sean V Kelley <seanvk at posteo.de>
> > > ---
> > >  src/Makefile.am        |    3 +
> > >  src/gen8_encoder_vp8.c |  140 +
> > >  src/gen8_mfc.c         |    8 +-
> > >  src/gen8_vme.c         |    5 +
> > >  src/i965_defines.h     |   10 +
> > >  src/i965_encoder.c     |    2 +
> > >  src/i965_encoder_vp8.c | 6697
> > ++++++++++++++++++++++++++++++++++++++++++++++++
> > >  src/i965_encoder_vp8.h | 2643 +++++++++++++++++++
> > >  8 files changed, 9507 insertions(+), 1 deletion(-)
> > 
> > I had a go with this on Kaby Lake.  In general, big win - looks like it can
> > be under half the bitrate at comparable quality (though it was pretty
> > terrible before...).
> > 
> > However, the rate control seems to do odd things at low bitrate relative to
> > the frame size?  I can get GPU hangs and wildly varying output bitrate with
> > it, though it seems ok at high bitrate.
> That's a concern.  Please report the If it really is a GPU hang, I need
> the error report for the DRM card0 log.
> 
> cat /sys/class/drm/card0/error
> 
> Please rerun and capture the DRM (i915) card0 error log.
>  
> >  
> > I had a look around the rate control and found two minor issues in the RC
> > configuration, though I don't think either of them are relevant to my
> > problem (see below).  I can try to make a reproducer if this is not already
> > known?
> > 
> Please do attempt to reproduce.  That's why I've put the patches out here to
> test.

Thanks for testing the patch, could you detail the steps to reproduce this
issue?


> 
> Thanks,
> 
> Sean
>  
> >  Thanks,
> > 
> > - Mark
> > 
> > 
> > > ...
> > > +
> > > +static void
> > > +i965_encoder_vp8_get_misc_parameters(VADriverContextP ctx,
> > > +                                     struct encode_state *encode_state,
> > > +                                     struct intel_encoder_context
> > *encoder_context)
> > > +{
> > > +    struct i965_encoder_vp8_context *vp8_context = encoder_context-
> > >vme_context;
> > > +
> > > +    if (vp8_context->internal_rate_mode == I965_BRC_CQP) {
> > > +        vp8_context->init_vbv_buffer_fullness_in_bit = 0;
> > > +        vp8_context->vbv_buffer_size_in_bit = 0;
> > > +        vp8_context->target_bit_rate = 0;
> > > +        vp8_context->max_bit_rate = 0;
> > > +        vp8_context->min_bit_rate = 0;
> > > +        vp8_context->brc_need_reset = 0;
> > > +    } else {
> > > +        vp8_context->gop_size = encoder_context->brc.gop_size;
> > > +
> > > +        if (encoder_context->brc.need_reset) {
> > > +            vp8_context->framerate = encoder_context->brc.framerate[0];
> > > +            vp8_context->vbv_buffer_size_in_bit = encoder_context-
> > >brc.hrd_buffer_size;
> > > +            vp8_context->init_vbv_buffer_fullness_in_bit =
> > encoder_context->brc.hrd_initial_buffer_fullness;
> > > +            vp8_context->max_bit_rate = encoder_context-
> > >brc.bits_per_second[0]; // currently only one layer is supported
> > > +            vp8_context->brc_need_reset = (vp8_context->brc_initted &&
> > encoder_context->brc.need_reset);
> > > +
> > > +            if (vp8_context->internal_rate_mode == I965_BRC_CBR) {
> > > +                vp8_context->min_bit_rate = vp8_context->max_bit_rate;
> > > +                vp8_context->target_bit_rate = vp8_context->max_bit_rate;
> > > +            } else {
> > > +                assert(vp8_context->internal_rate_mode == I965_BRC_VBR);
> > > +                vp8_context->min_bit_rate = vp8_context->max_bit_rate *
> > (2 * encoder_context->brc.target_percentage[0] - 100) / 100;
> > 
> > If target percentage is < 50 then (2 * encoder_context-
> > >brc.target_percentage[0] - 100) is negative.  Since it's unsigned, you end
> > up with a garbage number in min_bit_rate.
> That's a concern, also we may need to reconcile this with our handling for VP9
> encode.
>  
> >  
> > > +                vp8_context->target_bit_rate = vp8_context->max_bit_rate
> > * encoder_context->brc.target_percentage[0] / 100;
> > > +            }
> > > +        }
> > > +    }
> > > +
> > > +    if (encoder_context->quality_level == ENCODER_LOW_QUALITY)
> > > +        vp8_context->hme_16x_supported = 0;
> > > +}
> > > +
> > > ...
> > > +
> > > +static void
> > > +i965_encoder_vp8_vme_brc_init_reset_set_curbe(VADriverContextP ctx,
> > > +                                              struct encode_state
> > *encode_state,
> > > +                                              struct
> > intel_encoder_context *encoder_context,
> > > +                                              struct i965_gpe_context
> > *gpe_context)
> > > +{
> > > +    struct i965_encoder_vp8_context *vp8_context = encoder_context-
> > >vme_context;
> > > +    VAEncPictureParameterBufferVP8 *pic_param =
> > (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
> > > +    struct vp8_brc_init_reset_curbe_data *pcmd =
> > i965_gpe_context_map_curbe(gpe_context);
> > > +    double input_bits_per_frame, bps_ratio;
> > > +
> > > +    memset(pcmd, 0, sizeof(*pcmd));
> > > +
> > > +    pcmd->dw0.profile_level_max_frame = vp8_context->frame_width *
> > vp8_context->frame_height;
> > > +    pcmd->dw1.init_buf_full_in_bits = vp8_context-
> > >init_vbv_buffer_fullness_in_bit;
> > > +    pcmd->dw2.buf_size_in_bits = vp8_context->vbv_buffer_size_in_bit;
> > > +    pcmd->dw3.average_bitrate = ALIGN(vp8_context->target_bit_rate,
> > VP8_BRC_KBPS) / VP8_BRC_KBPS * VP8_BRC_KBPS;
> > > +    pcmd->dw4.max_bitrate = ALIGN(vp8_context->max_bit_rate,
> > VP8_BRC_KBPS) / VP8_BRC_KBPS * VP8_BRC_KBPS;
> > 
> > VP8_BRC_KBPS is 1000 which is not a power of two, so the ALIGN macro isn't
> > doing anything sensible here.
> Agree...
>  
> >  
> > > +    pcmd->dw6.frame_rate_m = vp8_context->framerate.num;
> > > +    pcmd->dw7.frame_rate_d = vp8_context->framerate.den;
> > > +    pcmd->dw8.brc_flag = 0;
> > > +    pcmd->dw8.gop_minus1 = vp8_context->gop_size - 1;
> > > +
> > > +    if (vp8_context->internal_rate_mode == I965_BRC_CBR) {
> > > +        pcmd->dw4.max_bitrate = pcmd->dw3.average_bitrate;
> > > +
> > > +        pcmd->dw8.brc_flag = pcmd->dw8.brc_flag | BRC_KERNEL_CBR;
> > > +    } else if (vp8_context->internal_rate_mode == I965_BRC_VBR) {
> > > +        if (pcmd->dw4.max_bitrate < pcmd->dw3.average_bitrate) {
> > > +            pcmd->dw4.max_bitrate = 2 * pcmd->dw3.average_bitrate;
> > > +        }
> > > +
> > > +        pcmd->dw8.brc_flag = pcmd->dw8.brc_flag | BRC_KERNEL_VBR;
> > > +    }
> > > +
> > > +    input_bits_per_frame =
> > > +        ((double)(pcmd->dw4.max_bitrate) * (double)(pcmd-
> > >dw7.frame_rate_d) /
> > > +         (double)(pcmd->dw6.frame_rate_m));
> > > +
> > > +    if (pcmd->dw2.buf_size_in_bits < (unsigned int)input_bits_per_frame *
> > 4) {
> > > +        pcmd->dw2.buf_size_in_bits = (unsigned int)input_bits_per_frame *
> > 4;
> > > +    }
> > > +
> > > +    if (pcmd->dw1.init_buf_full_in_bits == 0) {
> > > +        pcmd->dw1.init_buf_full_in_bits = 7 * pcmd->dw2.buf_size_in_bits
> > / 8;
> > > +    }
> > > +
> > > +    if (pcmd->dw1.init_buf_full_in_bits < (unsigned
> > int)(input_bits_per_frame * 2)) {
> > > +        pcmd->dw1.init_buf_full_in_bits = (unsigned
> > int)(input_bits_per_frame * 2);
> > > +    }
> > > +
> > > +    if (pcmd->dw1.init_buf_full_in_bits > pcmd->dw2.buf_size_in_bits) {
> > > +        pcmd->dw1.init_buf_full_in_bits = pcmd->dw2.buf_size_in_bits;
> > > +    }
> > > +
> > > +    bps_ratio = input_bits_per_frame / ((double)(pcmd-
> > >dw2.buf_size_in_bits) / 30);
> > > +    bps_ratio = (bps_ratio < 0.1) ? 0.1 : (bps_ratio > 3.5) ? 3.5 :
> > bps_ratio;
> > > +
> > > +    pcmd->dw9.frame_width_in_bytes = vp8_context->frame_width;
> > > +    pcmd->dw10.frame_height_in_bytes = vp8_context->frame_height;
> > > +    pcmd->dw10.avbr_accuracy = 30;
> > > +    pcmd->dw11.avbr_convergence = 150;
> > > +    pcmd->dw11.min_qp = pic_param->clamp_qindex_low;
> > > +    pcmd->dw12.max_qp = pic_param->clamp_qindex_high;
> > > +    pcmd->dw12.level_qp = 60;
> > > +
> > > +    // DW13 default 100
> > > +    pcmd->dw13.max_section_pct = 100;
> > > +    pcmd->dw13.under_shoot_cbr_pct = 115;
> > > +
> > > +    // DW14 default 100
> > > +    pcmd->dw14.min_section_pct = 100;
> > > +    pcmd->dw14.vbr_bias_pct = 100;
> > > +    pcmd->dw15.instant_rate_threshold_0_for_p = 30;
> > > +    pcmd->dw15.instant_rate_threshold_1_for_p = 50;
> > > +    pcmd->dw15.instant_rate_threshold_2_for_p = 70;
> > > +    pcmd->dw15.instant_rate_threshold_3_for_p = 120;
> > > +
> > > +    pcmd->dw17.instant_rate_threshold_0_for_i = 30;
> > > +    pcmd->dw17.instant_rate_threshold_1_for_i = 50;
> > > +    pcmd->dw17.instant_rate_threshold_2_for_i = 90;
> > > +    pcmd->dw17.instant_rate_threshold_3_for_i = 115;
> > > +    pcmd->dw18.deviation_threshold_0_for_p = (unsigned int)(-50 *
> > pow(0.9, bps_ratio));
> > > +    pcmd->dw18.deviation_threshold_1_for_p = (unsigned int)(-50 *
> > pow(0.66, bps_ratio));
> > > +    pcmd->dw18.deviation_threshold_2_for_p = (unsigned int)(-50 *
> > pow(0.46, bps_ratio));
> > > +    pcmd->dw18.deviation_threshold_3_for_p = (unsigned int)(-50 *
> > pow(0.3, bps_ratio));
> > > +    pcmd->dw19.deviation_threshold_4_for_p = (unsigned int)(50 * pow(0.3,
> > bps_ratio));
> > > +    pcmd->dw19.deviation_threshold_5_for_p = (unsigned int)(50 *
> > pow(0.46, bps_ratio));
> > > +    pcmd->dw19.deviation_threshold_6_for_p = (unsigned int)(50 * pow(0.7,
> > bps_ratio));
> > > +    pcmd->dw19.deviation_threshold_7_for_p = (unsigned int)(50 * pow(0.9,
> > bps_ratio));
> > > +    pcmd->dw20.deviation_threshold_0_for_vbr = (unsigned int)(-50 *
> > pow(0.9, bps_ratio));
> > > +    pcmd->dw20.deviation_threshold_1_for_vbr = (unsigned int)(-50 *
> > pow(0.7, bps_ratio));
> > > +    pcmd->dw20.deviation_threshold_2_for_vbr = (unsigned int)(-50 *
> > pow(0.5, bps_ratio));
> > > +    pcmd->dw20.deviation_threshold_3_for_vbr = (unsigned int)(-50 *
> > pow(0.3, bps_ratio));
> > > +    pcmd->dw21.deviation_threshold_4_for_vbr = (unsigned int)(100 *
> > pow(0.4, bps_ratio));
> > > +    pcmd->dw21.deviation_threshold_5_for_vbr = (unsigned int)(100 *
> > pow(0.5, bps_ratio));
> > > +    pcmd->dw21.deviation_threshold_6_for_vbr = (unsigned int)(100 *
> > pow(0.75, bps_ratio));
> > > +    pcmd->dw21.deviation_threshold_7_for_vbr = (unsigned int)(100 *
> > pow(0.9, bps_ratio));
> > > +    pcmd->dw22.deviation_threshold_0_for_i = (unsigned int)(-50 *
> > pow(0.8, bps_ratio));
> > > +    pcmd->dw22.deviation_threshold_1_for_i = (unsigned int)(-50 *
> > pow(0.6, bps_ratio));
> > > +    pcmd->dw22.deviation_threshold_2_for_i = (unsigned int)(-50 *
> > pow(0.34, bps_ratio));
> > > +    pcmd->dw22.deviation_threshold_3_for_i = (unsigned int)(-50 *
> > pow(0.2, bps_ratio));
> > > +    pcmd->dw23.deviation_threshold_4_for_i = (unsigned int)(50 * pow(0.2,
> > bps_ratio));
> > > +    pcmd->dw23.deviation_threshold_5_for_i = (unsigned int)(50 * pow(0.4,
> > bps_ratio));
> > > +    pcmd->dw23.deviation_threshold_6_for_i = (unsigned int)(50 *
> > pow(0.66, bps_ratio));
> > > +    pcmd->dw23.deviation_threshold_7_for_i = (unsigned int)(50 * pow(0.9,
> > bps_ratio));
> > > +
> > > +    // Default: 1
> > > +    pcmd->dw24.num_t_levels = 1;
> > > +
> > > +    if (!vp8_context->brc_initted) {
> > > +        vp8_context->brc_init_current_target_buf_full_in_bits = pcmd-
> > >dw1.init_buf_full_in_bits;
> > > +    }
> > > +
> > > +    vp8_context->brc_init_reset_buf_size_in_bits = pcmd-
> > >dw2.buf_size_in_bits;
> > > +    vp8_context->brc_init_reset_input_bits_per_frame =
> > input_bits_per_frame;
> > > +
> > > +    pcmd->dw26.history_buffer_bti = VP8_BTI_BRC_INIT_RESET_HISTORY;
> > > +    pcmd->dw27.distortion_buffer_bti = VP8_BTI_BRC_INIT_RESET_DISTORTION;
> > > +
> > > +    i965_gpe_context_unmap_curbe(gpe_context);
> > > +}
> > > +
> > > ...
> > _______________________________________________
> > Libva mailing list
> > Libva at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/libva
> > 
> 
> 
> -- 
> Sean V. Kelley <sean.v.kelley at intel.com>
> Open Source Technology Center / SSG
> Intel Corp.
> _______________________________________________
> Libva mailing list
> Libva at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/libva


More information about the Libva mailing list