[Libva] [Libva-intel-driver][PATCH] Support AVC VDEnc on KBL

Kelley, Sean V sean.v.kelley at intel.com
Tue Dec 27 17:21:03 UTC 2016



On 12/23/16, 8:58 AM, "Libva on behalf of Xiang, Haihao" <libva-bounces at lists.freedesktop.org on behalf of haihao.xiang at intel.com> wrote:

    I verified AVC VDEnc on KBL with the HuC loading patch from
    https://patchwork.freedesktop.org/api/1.0/series/16584/revisions/1/mbox/

lgtm

Sean
    
    Signed-off-by: Xiang, Haihao <haihao.xiang at intel.com>
    ---
     src/gen9_vdenc.c       | 231 +++++++++++++++++++++++++++++++++++++++++++++----
     src/gen9_vdenc.h       |   7 +-
     src/i965_defines.h     |   1 +
     src/i965_device_info.c |   3 +
     4 files changed, 224 insertions(+), 18 deletions(-)
    
    diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
    index 6402d41..1913a67 100644
    --- a/src/gen9_vdenc.c
    +++ b/src/gen9_vdenc.c
    @@ -41,6 +41,9 @@
     #include "intel_media.h"
     #include "gen9_vdenc.h"
     
    +extern int
    +intel_avc_enc_slice_type_fixup(int slice_type);
    +
     static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
         0,   0, -8, -12, -16, -20, -28, -36,
         0,   0, -4,  -8, -12, -16, -24, -32,
    @@ -2050,6 +2053,7 @@ gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
         }
     
         pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
    +    pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
     
         pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
     
    @@ -2752,6 +2756,86 @@ gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
     }
     
     static void
    +gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
    +                                       struct encode_state *encode_state,
    +                                       struct intel_encoder_context *encoder_context,
    +                                       VAEncSliceParameterBufferH264 *slice_param)
    +{
    +    struct intel_batchbuffer *batch = encoder_context->base.batch;
    +    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
    +
    +    BEGIN_BCS_BATCH(batch, 3);
    +
    +    OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
    +
    +    if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
    +        OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
    +                              slice_param->luma_weight_l0[1] << 16 |
    +                              slice_param->luma_offset_l0[0] << 8 |
    +                              slice_param->luma_weight_l0[0] << 0));
    +        OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
    +                              slice_param->luma_weight_l0[2] << 0));
    +    } else {
    +        OUT_BCS_BATCH(batch, (0 << 24 |
    +                              1 << 16 |
    +                              0 << 8 |
    +                              1 << 0));
    +        OUT_BCS_BATCH(batch, (0 << 8 |
    +                              1 << 0));
    +    }
    +
    +
    +    ADVANCE_BCS_BATCH(batch);
    +}
    +
    +static void
    +gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
    +                               struct encode_state *encode_state,
    +                               struct intel_encoder_context *encoder_context,
    +                               VAEncSliceParameterBufferH264 *slice_param,
    +                               VAEncSliceParameterBufferH264 *next_slice_param)
    +{
    +    struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
    +    struct intel_batchbuffer *batch = encoder_context->base.batch;
    +    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
    +    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
    +    int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
    +    int luma_log2_weight_denom, weighted_pred_idc;
    +
    +    slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
    +    slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
    +
    +    if (next_slice_param) {
    +        next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
    +        next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
    +    } else {
    +        next_slice_hor_pos = 0;
    +        next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
    +    }
    +
    +    if (slice_type == SLICE_TYPE_P)
    +        weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
    +    else
    +        weighted_pred_idc = 0;
    +
    +    if (weighted_pred_idc == 1)
    +        luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
    +    else
    +        luma_log2_weight_denom = 0;
    +
    +    BEGIN_BCS_BATCH(batch, 4);
    +
    +    OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
    +    OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
    +                          slice_ver_pos));
    +    OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
    +                          next_slice_ver_pos));
    +    OUT_BCS_BATCH(batch, luma_log2_weight_denom);
    +
    +    ADVANCE_BCS_BATCH(batch);
    +}
    +
    +static void
     gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
                                struct encode_state *encode_state,
                                struct intel_encoder_context *encoder_context)
    @@ -2766,9 +2850,6 @@ gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
         ADVANCE_BCS_BATCH(batch);
     }
     
    -extern int
    -intel_avc_enc_slice_type_fixup(int slice_type);
    -
     static void
     gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
                                      struct intel_encoder_context *encoder_context,
    @@ -2802,7 +2883,8 @@ static void
     gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
                                                 struct encode_state *encode_state,
                                                 struct intel_encoder_context *encoder_context,
    -                                            int slice_index)
    +                                            int slice_index,
    +                                            unsigned int insert_one_zero_byte)
     {
         VAEncPackedHeaderParameterBuffer *param = NULL;
         unsigned int length_in_bits;
    @@ -2846,13 +2928,28 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
                                              0,
                                              !param->has_emulation_bytes,
                                              0);
    +
    +        insert_one_zero_byte = 0;
    +    }
    +
    +    /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
    +    if (insert_one_zero_byte) {
    +        unsigned int insert_data[] = { 0, };
    +
    +        gen9_vdenc_mfx_avc_insert_object(ctx,
    +                                         encoder_context,
    +                                         insert_data,
    +                                         1,
    +                                         8,
    +                                         1,
    +                                         0, 0, 0, 0);
         }
     
         if (slice_header_index == -1) {
             VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
             VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
             VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
    -        unsigned char *slice_header = NULL;
    +        unsigned char *slice_header = NULL, *slice_header1 = NULL;
             int slice_header_length_in_bits = 0;
     
             /* No slice header data is passed. And the driver needs to generate it */
    @@ -2861,9 +2958,17 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
                                                                  pic_param,
                                                                  slice_params,
                                                                  &slice_header);
    +
    +        slice_header1 = slice_header;
    +
    +        if (insert_one_zero_byte) {
    +            slice_header1 += 1;
    +            slice_header_length_in_bits -= 8;
    +        }
    +
             gen9_vdenc_mfx_avc_insert_object(ctx,
                                              encoder_context,
    -                                         (unsigned int *)slice_header,
    +                                         (unsigned int *)slice_header1,
                                              ALIGN(slice_header_length_in_bits, 32) >> 5,
                                              slice_header_length_in_bits & 0x1f,
                                              5,  /* first 5 bytes are start code + nal unit type */
    @@ -2873,20 +2978,31 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
             free(slice_header);
         } else {
             unsigned int skip_emul_byte_cnt;
    +        unsigned char *slice_header1 = NULL;
     
             header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
     
             param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
             length_in_bits = param->bit_length;
     
    +        slice_header1 = (unsigned char *)header_data;
    +
    +        if (insert_one_zero_byte) {
    +            slice_header1 += 1;
    +            length_in_bits -= 8;
    +        }
    +
             /* as the slice header is the last header data for one slice,
              * the last header flag is set to one.
              */
             skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
     
    +        if (insert_one_zero_byte)
    +            skip_emul_byte_cnt -= 1;
    +
             gen9_vdenc_mfx_avc_insert_object(ctx,
                                              encoder_context,
    -                                         header_data,
    +                                         (unsigned int *)slice_header1,
                                              ALIGN(length_in_bits, 32) >> 5,
                                              length_in_bits & 0x1f,
                                              skip_emul_byte_cnt,
    @@ -2910,8 +3026,11 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
         int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
         unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
         unsigned int skip_emul_byte_cnt;
    +    unsigned int insert_one_zero_byte = 0;
     
         if (slice_index == 0) {
    +        insert_one_zero_byte = 1;
    +
             if (encode_state->packed_header_data[idx]) {
                 VAEncPackedHeaderParameterBuffer *param = NULL;
                 unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
    @@ -2932,6 +3051,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
                                                  0,
                                                  !param->has_emulation_bytes,
                                                  0);
    +
    +            insert_one_zero_byte = 0;
             }
     
             idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
    @@ -2957,6 +3078,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
                                                  0,
                                                  !param->has_emulation_bytes,
                                                  0);
    +
    +            insert_one_zero_byte = 0;
             }
     
             idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
    @@ -2981,15 +3104,21 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
                                                  0,
                                                  !param->has_emulation_bytes,
                                                  0);
    +
    +            insert_one_zero_byte = 0;
             } else if (internal_rate_mode == I965_BRC_CBR) {
                 /* TODO: insert others */
             }
         }
     
    +    if (vdenc_context->is_frame_level_vdenc)
    +        insert_one_zero_byte = 0;
    +
         gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
                                                     encode_state,
                                                     encoder_context,
    -                                                slice_index);
    +                                                slice_index,
    +                                                insert_one_zero_byte);
     }
     
     static void
    @@ -3233,6 +3362,7 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
                                     VAEncSliceParameterBufferH264 *next_slice_param,
                                     int slice_index)
     {
    +    struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
     
         gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
    @@ -3252,6 +3382,18 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
                                          encoder_context,
                                          slice_param,
                                          slice_index);
    +
    +    if (!vdenc_context->is_frame_level_vdenc) {
    +        gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
    +                                               encode_state,
    +                                               encoder_context,
    +                                               slice_param);
    +        gen95_vdenc_vdenc_walker_state(ctx,
    +                                       encode_state,
    +                                       encoder_context,
    +                                       slice_param,
    +                                       next_slice_param);
    +    }
     }
     
     static void
    @@ -3259,12 +3401,12 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
                                     struct encode_state *encode_state,
                                     struct intel_encoder_context *encoder_context)
     {
    +    struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
         struct intel_batchbuffer *batch = encoder_context->base.batch;
         struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
         VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
         int i, j;
         int slice_index = 0;
    -    int is_frame_level_vdenc = 1;       /* TODO: check it for SKL */
         int has_tail = 0;                   /* TODO: check it later */
     
         for (j = 0; j < encode_state->num_slice_params_ext; j++) {
    @@ -3287,22 +3429,47 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
                                                 slice_param,
                                                 next_slice_param,
                                                 slice_index);
    -            slice_param++;
    -            slice_index++;
     
    -            if (is_frame_level_vdenc)
    +            if (vdenc_context->is_frame_level_vdenc)
                     break;
                 else {
    -                /* TODO: remove assert(0) and add other commands here */
    -                assert(0);
    +                struct vd_pipeline_flush_parameter pipeline_flush_params;
    +                int insert_mi_flush;
    +
    +                memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
    +
    +                if (next_slice_group_param) {
    +                    pipeline_flush_params.mfx_pipeline_done = 0;
    +                    insert_mi_flush = 1;
    +                } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
    +                    pipeline_flush_params.mfx_pipeline_done = 0;
    +                    insert_mi_flush = 1;
    +                } else {
    +                    pipeline_flush_params.mfx_pipeline_done = !has_tail;
    +                    insert_mi_flush = 0;
    +                }
    +
    +                pipeline_flush_params.vdenc_pipeline_done = 1;
    +                pipeline_flush_params.vdenc_pipeline_command_flush = 1;
    +                pipeline_flush_params.vd_command_message_parser_done = 1;
    +                gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
    +
    +                if (insert_mi_flush) {
    +                    memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
    +                    mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
    +                    gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
    +                }
                 }
    +
    +            slice_param++;
    +            slice_index++;
             }
     
    -        if (is_frame_level_vdenc)
    +        if (vdenc_context->is_frame_level_vdenc)
                 break;
         }
     
    -    if (is_frame_level_vdenc) {
    +    if (vdenc_context->is_frame_level_vdenc) {
             struct vd_pipeline_flush_parameter pipeline_flush_params;
     
             gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
    @@ -3653,6 +3820,36 @@ gen9_vdenc_allocate_resources(VADriverContextP ctx,
                                     "HuC Status buffer");
     }
     
    +static void
    +gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
    +                              struct intel_encoder_context *encoder_context,
    +                              struct gen9_vdenc_context *vdenc_context)
    +{
    +    vdenc_context->is_frame_level_vdenc = 1;
    +}
    +
    +static void
    +gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
    +                               struct intel_encoder_context *encoder_context,
    +                               struct gen9_vdenc_context *vdenc_context)
    +{
    +    vdenc_context->use_extended_pak_obj_cmd = 1;
    +}
    +
    +static void
    +vdenc_hw_interfaces_init(VADriverContextP ctx,
    +                         struct intel_encoder_context *encoder_context,
    +                         struct gen9_vdenc_context *vdenc_context)
    +{
    +    struct i965_driver_data *i965 = i965_driver_data(ctx);
    +
    +    if (IS_KBL(i965->intel.device_info)) {
    +        gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
    +    } else {
    +        gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
    +    }
    +}
    +
     static VAStatus
     gen9_vdenc_context_get_status(VADriverContextP ctx,
                                   struct intel_encoder_context *encoder_context,
    @@ -3680,7 +3877,9 @@ gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *enco
         vdenc_context->num_passes = 1;
         vdenc_context->vdenc_streamin_enable = 0;
         vdenc_context->vdenc_pak_threshold_check_enable = 0;
    +    vdenc_context->is_frame_level_vdenc = 0;
     
    +    vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
         gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
     
         encoder_context->mfc_context = vdenc_context;
    diff --git a/src/gen9_vdenc.h b/src/gen9_vdenc.h
    index 41e4362..ad0f2ae 100644
    --- a/src/gen9_vdenc.h
    +++ b/src/gen9_vdenc.h
    @@ -304,7 +304,8 @@ struct gen9_vdenc_img_state
             uint32_t bidirectional_mix_disable:1;
             uint32_t pad1:1;
             uint32_t time_budget_overflow_check:1;
    -        uint32_t pad2:2;
    +        uint32_t pad2:1;
    +        uint32_t extended_pak_obj_cmd_enable:1;
             uint32_t transform_8x8_flag:1;
             uint32_t vdenc_l1_cache_priority:2;
             uint32_t pad3:22;
    @@ -776,7 +777,9 @@ struct gen9_vdenc_context
         uint32_t    frame_type:2;
     
         uint32_t    mb_brc_enabled:1;
    -    uint32_t    pad0:31;
    +    uint32_t    is_frame_level_vdenc:1;
    +    uint32_t    use_extended_pak_obj_cmd:1;
    +    uint32_t    pad0:29;
     
         struct i965_gpe_resource brc_init_reset_dmem_res;
         struct i965_gpe_resource brc_history_buffer_res;
    diff --git a/src/i965_defines.h b/src/i965_defines.h
    index f86ac8e..941ad4e 100755
    --- a/src/i965_defines.h
    +++ b/src/i965_defines.h
    @@ -959,6 +959,7 @@
     #define VDENC_IMG_STATE                 VDENC(1, 0, 5)
     #define VDENC_CONST_QPT_STATE           VDENC(1, 0, 6)
     #define VDENC_WALKER_STATE              VDENC(1, 0, 7)
    +#define VDENC_WEIGHTSOFFSETS_STATE      VDENC(1, 0, 8)
     
     #define VDENC_CODEC_AVC                 2
     
    diff --git a/src/i965_device_info.c b/src/i965_device_info.c
    index 73602ae..0fc8930 100644
    --- a/src/i965_device_info.c
    +++ b/src/i965_device_info.c
    @@ -481,6 +481,9 @@ static struct hw_codec_info kbl_hw_codec_info = {
         .has_vp9_decoding = 1,
         .has_vpp_p010 = 1,
         .has_vp9_encoding = 1,
    +    .has_lp_h264_encoding = 1,
    +
    +    .lp_h264_brc_mode = VA_RC_CQP,
     
         .num_filters = 5,
         .filters = {
    -- 
    1.9.1
    
    _______________________________________________
    Libva mailing list
    Libva at lists.freedesktop.org
    https://lists.freedesktop.org/mailman/listinfo/libva
    



More information about the Libva mailing list