[Libva] [PATCH 19/31] ENC: MBENC kernel for AVC encoder
Sean V Kelley
seanvk at posteo.de
Tue Jan 10 23:38:01 UTC 2017
From: Pengfei Qu <Pengfei.Qu at intel.com>
Signed-off-by: Pengfei Qu <Pengfei.Qu at intel.com>
Reviewed-by: Sean V Kelley <seanvk at posteo.de>
---
src/gen9_avc_encoder.c | 927 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 927 insertions(+)
diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c
index cd847a88..af581fc7 100755
--- a/src/gen9_avc_encoder.c
+++ b/src/gen9_avc_encoder.c
@@ -2844,3 +2844,930 @@ gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
}
i965_unmap_gpe_resource(gpe_resource);
}
+
+static void
+gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct i965_gpe_context *gpe_context,
+ struct intel_encoder_context *encoder_context,
+ void * param)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ gen9_avc_mbenc_curbe_data *cmd;
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
+
+ VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+ VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
+ VASurfaceID surface_id;
+ struct object_surface *obj_surface;
+
+ struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
+ unsigned char qp = 0;
+ unsigned char me_method = 0;
+ unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
+ unsigned int table_idx = 0;
+
+ unsigned int preset = generic_state->preset;
+ me_method = (generic_state->frame_type == SLICE_TYPE_B)? gen9_avc_b_me_method[preset]:gen9_avc_p_me_method[preset];
+ qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+
+ cmd = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
+ memset(cmd,0,sizeof(gen9_avc_mbenc_curbe_data));
+
+ if(mbenc_i_frame_dist_in_use)
+ {
+ memcpy(cmd,gen9_avc_mbenc_curbe_i_frame_dist_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+
+ }else
+ {
+ switch(generic_state->frame_type)
+ {
+ case SLICE_TYPE_I:
+ memcpy(cmd,gen9_avc_mbenc_curbe_normal_i_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+ break;
+ case SLICE_TYPE_P:
+ memcpy(cmd,gen9_avc_mbenc_curbe_normal_p_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+ break;
+ case SLICE_TYPE_B:
+ memcpy(cmd,gen9_avc_mbenc_curbe_normal_b_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+ break;
+ default:
+ assert(0);
+ }
+
+ }
+ cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+ cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+ cmd->dw0.t8x8_flag_for_inter_enable = pic_param->pic_fields.bits.transform_8x8_mode_flag;
+ cmd->dw37.t8x8_flag_for_inter_enable = pic_param->pic_fields.bits.transform_8x8_mode_flag;
+
+ cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
+ cmd->dw38.max_len_sp = 0;
+
+ cmd->dw3.src_access = 0;
+ cmd->dw3.ref_access = 0;
+
+ if(avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I))
+ {
+ if(avc_state->ftq_override)
+ {
+ cmd->dw3.ftq_enable = avc_state->ftq_enable;
+
+ }else
+ {
+ if(generic_state->frame_type == SLICE_TYPE_P)
+ {
+ cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
+
+ }else
+ {
+ cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
+ }
+ }
+ }else
+ {
+ cmd->dw3.ftq_enable = 0;
+ }
+
+ if(avc_state->disable_sub_mb_partion)
+ cmd->dw3.sub_mb_part_mask = 0x7;
+
+ if(mbenc_i_frame_dist_in_use)
+ {
+ cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
+ cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
+ cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1)/4;
+ cmd->dw6.batch_buffer_end = 0;
+ cmd->dw31.intra_compute_type = 1;
+
+ }else
+ {
+ cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
+ cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
+ cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice)?generic_state->frame_height_in_mbs:avc_state->slice_height;
+
+ {
+ memcpy(&(cmd->dw8),gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp],8*sizeof(unsigned int));
+ if((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable)
+ {
+ //cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
+ }else if(avc_state->skip_bias_adjustment_enable)
+ {
+ /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
+ // No need to check for P picture as the flag is only enabled for P picture */
+ cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
+
+ }
+ }
+
+ table_idx = (generic_state->frame_type == SLICE_TYPE_B)?1:0;
+ memcpy(&(cmd->dw16),table_enc_search_path[table_idx][me_method],16*sizeof(unsigned int));
+ }
+ cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
+ cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
+ cmd->dw4.field_parity_flag = 0;//bottom field
+ cmd->dw4.enable_cur_fld_idr = 0;//field realted
+ cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
+ cmd->dw4.hme_enable = generic_state->hme_enabled;
+ cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
+ cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
+
+
+ cmd->dw7.intra_part_mask = pic_param->pic_fields.bits.transform_8x8_mode_flag?0:0x02;
+ cmd->dw7.src_field_polarity = 0;//field related
+
+ /*ftq_skip_threshold_lut set,dw14 /15*/
+
+ /*r5 disable NonFTQSkipThresholdLUT*/
+ if(generic_state->frame_type == SLICE_TYPE_P)
+ {
+ cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][pic_param->pic_fields.bits.transform_8x8_mode_flag][qp];
+
+ }else if(generic_state->frame_type == SLICE_TYPE_B)
+ {
+ cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][pic_param->pic_fields.bits.transform_8x8_mode_flag][qp];
+
+ }
+
+ cmd->dw13.qp_prime_y = qp;
+ cmd->dw13.qp_prime_cb = qp;
+ cmd->dw13.qp_prime_cr = qp;
+ cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
+
+
+ if((generic_state->frame_type != SLICE_TYPE_I)&& avc_state->multi_pre_enable)
+ {
+ switch(gen9_avc_multi_pred[preset])
+ {
+ case 0:
+ cmd->dw32.mult_pred_l0_disable = 128;
+ cmd->dw32.mult_pred_l1_disable = 128;
+ break;
+ case 1:
+ cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P)?1:128;
+ cmd->dw32.mult_pred_l1_disable = 128;
+ break;
+ case 2:
+ cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
+ cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
+ break;
+ case 3:
+ cmd->dw32.mult_pred_l0_disable = 1;
+ cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
+ break;
+
+ }
+
+ }else
+ {
+ cmd->dw32.mult_pred_l0_disable = 128;
+ cmd->dw32.mult_pred_l1_disable = 128;
+ }
+
+ /*field setting for dw33 34, ignored*/
+
+ if(avc_state->adaptive_transform_decision_enable)
+ {
+ if(generic_state->frame_type != SLICE_TYPE_I)
+ {
+ cmd->dw34.enable_adaptive_tx_decision = 1;
+ }
+
+ cmd->dw58.mb_texture_threshold = 1024;
+ cmd->dw58.tx_decision_threshold = 128;
+ }
+
+
+ if(generic_state->frame_type == SLICE_TYPE_B)
+ {
+ cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
+ cmd->dw34.list1_ref_id0_frm_field_parity = 0;
+ cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
+ }
+ cmd->dw34.b_original_bff = 0; //frame only
+ cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
+ cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
+ cmd->dw34.mad_enable_falg = avc_state->mad_enable;
+ cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
+ cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
+ cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
+
+ if(cmd->dw34.force_non_skip_check)
+ {
+ cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
+ }
+
+ cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
+ cmd->dw38.ref_threshold = 400;
+ cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B)?gen9_avc_hme_b_combine_len[preset]:gen9_avc_hme_combine_len[preset];
+
+ /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4 bytes)
+ 0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
+ starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
+ cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled)?0:2;
+
+ if(mbenc_i_frame_dist_in_use)
+ {
+ cmd->dw13.qp_prime_y = 0;
+ cmd->dw13.qp_prime_cb = 0;
+ cmd->dw13.qp_prime_cr = 0;
+ cmd->dw33.intra_16x16_nondc_penalty = 0;
+ cmd->dw33.intra_8x8_nondc_penalty = 0;
+ cmd->dw33.intra_4x4_nondc_penalty = 0;
+
+ }
+ if(cmd->dw4.use_actual_ref_qp_value)
+ {
+ cmd->dw44.actual_qp_value_for_ref_id0_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,0);
+ cmd->dw44.actual_qp_value_for_ref_id1_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,1);
+ cmd->dw44.actual_qp_value_for_ref_id2_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,2);
+ cmd->dw44.actual_qp_value_for_ref_id3_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,3);
+ cmd->dw45.actual_qp_value_for_ref_id4_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,4);
+ cmd->dw45.actual_qp_value_for_ref_id5_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,5);
+ cmd->dw45.actual_qp_value_for_ref_id6_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,6);
+ cmd->dw45.actual_qp_value_for_ref_id7_list0 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,7);
+ cmd->dw46.actual_qp_value_for_ref_id0_list1 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,0);
+ cmd->dw46.actual_qp_value_for_ref_id1_list1 = gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,1);
+ }
+
+ table_idx = slice_type_kernel[generic_state->frame_type];
+ cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
+
+ if(generic_state->frame_type == SLICE_TYPE_I)
+ {
+ cmd->dw0.skip_mode_enable = 0;
+ cmd->dw37.skip_mode_enable = 0;
+ cmd->dw36.hme_combine_overlap = 0;
+ cmd->dw47.intra_cost_sf = 16;
+ cmd->dw34.enable_direct_bias_adjustment = 0;
+ cmd->dw34.enable_global_motion_bias_adjustment = 0;
+
+ }else if(generic_state->frame_type == SLICE_TYPE_P)
+ {
+ cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
+ cmd->dw3.bme_disable_fbr = 1;
+ cmd->dw5.ref_width = gen9_avc_search_x[preset];
+ cmd->dw5.ref_height = gen9_avc_search_y[preset];
+ cmd->dw7.non_skip_zmv_added = 1;
+ cmd->dw7.non_skip_mode_added = 1;
+ cmd->dw7.skip_center_mask = 1;
+ cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
+ cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
+ cmd->dw36.hme_combine_overlap = 1;
+ cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
+ cmd->dw39.ref_width = gen9_avc_search_x[preset];
+ cmd->dw39.ref_height = gen9_avc_search_y[preset];
+ cmd->dw34.enable_direct_bias_adjustment = 0;
+ cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
+ if(avc_state->global_motion_bias_adjustment_enable)
+ cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
+
+ }else
+ {
+ cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
+ cmd->dw1.bi_weight = avc_state->bi_weight;
+ cmd->dw3.search_ctrl = 7;
+ cmd->dw3.skip_type = 1;
+ cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
+ cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
+ cmd->dw7.skip_center_mask = 0xff;
+ cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
+ cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
+ cmd->dw36.hme_combine_overlap = 1;
+ surface_id = slice_param->RefPicList1[0].picture_id;
+ obj_surface = SURFACE(surface_id);
+ if (!obj_surface)
+ {
+ WARN_ONCE("Invalid backward reference frame\n");
+ return;
+ }
+ cmd->dw36.is_fwd_frame_short_term_ref = !!( slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
+
+ cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
+ cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l1_active_minus1:0;
+ cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
+ cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
+ cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
+ cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
+ cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
+ cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
+ cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
+ cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
+ cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
+ cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
+
+ cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
+ if(cmd->dw34.enable_direct_bias_adjustment)
+ {
+ cmd->dw7.non_skip_zmv_added = 1;
+ cmd->dw7.non_skip_mode_added = 1;
+ }
+
+ cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
+ if(avc_state->global_motion_bias_adjustment_enable)
+ cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
+
+ }
+
+ avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
+
+ if(avc_state->rolling_intra_refresh_enable)
+ {
+ /*by now disable it*/
+ cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
+
+ }else
+ {
+ cmd->dw34.widi_intra_refresh_en = 0;
+ }
+
+ cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
+ cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
+
+ /*roi set disable by now. 49-56*/
+ if(curbe_param->roi_enabled)
+ {
+ cmd->dw49.roi_1_x_left = generic_state->roi[0].left;
+ cmd->dw49.roi_1_y_top = generic_state->roi[0].top;
+ cmd->dw50.roi_1_x_right = generic_state->roi[0].right;
+ cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
+
+ cmd->dw51.roi_2_x_left = generic_state->roi[1].left;
+ cmd->dw51.roi_2_y_top = generic_state->roi[1].top;
+ cmd->dw52.roi_2_x_right = generic_state->roi[1].right;
+ cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
+
+ cmd->dw53.roi_3_x_left = generic_state->roi[2].left;
+ cmd->dw53.roi_3_y_top = generic_state->roi[2].top;
+ cmd->dw54.roi_3_x_right = generic_state->roi[2].right;
+ cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
+
+ cmd->dw55.roi_4_x_left = generic_state->roi[3].left;
+ cmd->dw55.roi_4_y_top = generic_state->roi[3].top;
+ cmd->dw56.roi_4_x_right = generic_state->roi[3].right;
+ cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
+
+ if(!generic_state->brc_enabled)
+ {
+ char tmp = 0;
+ tmp = generic_state->roi[0].value;
+ CLIP(tmp,-qp,52-qp);
+ cmd->dw57.roi_1_dqp_prime_y = tmp;
+ tmp = generic_state->roi[1].value;
+ CLIP(tmp,-qp,52-qp);
+ cmd->dw57.roi_2_dqp_prime_y = tmp;
+ tmp = generic_state->roi[2].value;
+ CLIP(tmp,-qp,52-qp);
+ cmd->dw57.roi_3_dqp_prime_y = tmp;
+ tmp = generic_state->roi[3].value;
+ CLIP(tmp,-qp,52-qp);
+ cmd->dw57.roi_4_dqp_prime_y = tmp;
+ }else
+ {
+ cmd->dw34.roi_enable_flag = 0;
+ }
+ }
+
+ cmd->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
+ cmd->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
+ cmd->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
+ cmd->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
+ cmd->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
+ cmd->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
+ cmd->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
+ cmd->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
+ cmd->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
+ cmd->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
+ cmd->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
+ cmd->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
+ cmd->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
+ cmd->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
+ cmd->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
+ cmd->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
+ cmd->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
+ cmd->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
+ cmd->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
+ cmd->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
+ cmd->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
+ cmd->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
+
+ i965_gpe_context_unmap_curbe(gpe_context);
+
+ return;
+}
+
+static void
+gen9_avc_send_surface_mbenc(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct i965_gpe_context *gpe_context,
+ struct intel_encoder_context *encoder_context,
+ void * param_mbenc)
+{
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
+ struct object_surface *obj_surface;
+ struct gen9_surface_avc *avc_priv_surface;
+ struct i965_gpe_resource *gpe_resource;
+ struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
+ VASurfaceID surface_id;
+ unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
+ unsigned int size = 0;
+ unsigned int w_mb = generic_state->frame_width_in_mbs;
+ unsigned int h_mb = generic_state->frame_height_in_mbs;
+ int i = 0;
+ VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+
+ obj_surface = encode_state->reconstructed_object;
+
+ if (!obj_surface || !obj_surface->private_data)
+ return;
+ avc_priv_surface = obj_surface->private_data;
+
+ /*pak obj command buffer output*/
+ size = w_mb * h_mb * 16 * 4;
+ gpe_resource = &avc_priv_surface->res_mb_code_surface;
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
+
+ /*mv data buffer output*/
+ size = w_mb * h_mb * 32 * 4;
+ gpe_resource = &avc_priv_surface->res_mv_data_surface;
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
+
+ /*input current YUV surface, current input Y/UV object*/
+ if(mbenc_i_frame_dist_in_use)
+ {
+ obj_surface = encode_state->reconstructed_object;
+ if (!obj_surface || !obj_surface->private_data)
+ return;
+ avc_priv_surface = obj_surface->private_data;
+ obj_surface = avc_priv_surface->scaled_4x_surface_obj;
+ }else
+ {
+ obj_surface = encode_state->input_yuv_object;
+ }
+ gen9_add_2d_gpe_surface(ctx,
+ gpe_context,
+ obj_surface,
+ 0,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_CURR_Y_INDEX);
+
+ gen9_add_2d_gpe_surface(ctx,
+ gpe_context,
+ obj_surface,
+ 1,
+ 1,
+ I965_SURFACEFORMAT_R16_UINT,
+ GEN9_AVC_MBENC_CURR_UV_INDEX);
+
+ if(generic_state->hme_enabled)
+ {
+ /*memv input 4x*/
+ gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
+ /* memv distortion input*/
+ gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
+ }
+
+ /*mbbrc const data_buffer*/
+ if(param->mb_const_data_buffer_in_use)
+ {
+ size = 16 * 52 * sizeof(unsigned int);
+ gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
+
+ }
+
+ /*mb qp data_buffer*/
+ if(param->mb_qp_buffer_in_use)
+ {
+ if(avc_state->mb_qp_data_enable)
+ gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
+ else
+ gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_MBQP_INDEX);
+ }
+
+ /*input current YUV surface, current input Y/UV object*/
+ if(mbenc_i_frame_dist_in_use)
+ {
+ obj_surface = encode_state->reconstructed_object;
+ if (!obj_surface || !obj_surface->private_data)
+ return;
+ avc_priv_surface = obj_surface->private_data;
+ obj_surface = avc_priv_surface->scaled_4x_surface_obj;
+ }else
+ {
+ obj_surface = encode_state->input_yuv_object;
+ }
+ gen9_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
+ /*input ref YUV surface*/
+ for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
+ {
+ surface_id = slice_param->RefPicList0[i].picture_id;
+ obj_surface = SURFACE(surface_id);
+ if (!obj_surface || !obj_surface->private_data)
+ break;
+
+ gen9_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 1);
+ }
+ /*input current YUV surface, current input Y/UV object*/
+ if(mbenc_i_frame_dist_in_use)
+ {
+ obj_surface = encode_state->reconstructed_object;
+ if (!obj_surface || !obj_surface->private_data)
+ return;
+ avc_priv_surface = obj_surface->private_data;
+ obj_surface = avc_priv_surface->scaled_4x_surface_obj;
+ }else
+ {
+ obj_surface = encode_state->input_yuv_object;
+ }
+ gen9_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
+
+ for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
+ {
+ if(i > 0) break;// only one ref supported here for B frame
+ surface_id = slice_param->RefPicList1[i].picture_id;
+ obj_surface = SURFACE(surface_id);
+ if (!obj_surface || !obj_surface->private_data)
+ break;
+
+ gen9_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1);
+ gen9_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 2);
+ if(i == 0)
+ {
+ avc_priv_surface = obj_surface->private_data;
+ /*pak obj command buffer output(mb code)*/
+ size = w_mb * h_mb * 16 * 4;
+ gpe_resource = &avc_priv_surface->res_mb_code_surface;
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
+
+ /*mv data buffer output*/
+ size = w_mb * h_mb * 32 * 4;
+ gpe_resource = &avc_priv_surface->res_mv_data_surface;
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
+
+ }
+
+ if( i < INTEL_AVC_MAX_BWD_REF_NUM)
+ {
+ gen9_add_adv_gpe_surface(ctx, gpe_context,
+ obj_surface,
+ GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
+ }
+
+ }
+
+ /* BRC distortion data buffer for I frame*/
+ if(mbenc_i_frame_dist_in_use)
+ {
+ gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
+ }
+
+ /* as ref frame ,update later RefPicSelect of Current Picture*/
+ obj_surface = encode_state->reconstructed_object;
+ avc_priv_surface = obj_surface->private_data;
+ if(avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref)
+ {
+ gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
+
+ }
+
+ if(param->mb_vproc_stats_enable)
+ {
+ /*mb status buffer input*/
+ size = w_mb * h_mb * 16 * 4;
+ gpe_resource = &(avc_ctx->res_mb_status_buffer);
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_MB_STATS_INDEX);
+
+ }else if(avc_state->flatness_check_enable)
+ {
+
+ gpe_resource = &(avc_ctx->res_flatness_check_surface);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_MB_STATS_INDEX);
+ }
+
+ if(param->mad_enable)
+ {
+ /*mad buffer input*/
+ size = 4;
+ gpe_resource = &(avc_ctx->res_mad_data_buffer);
+ gen9_add_buffer_gpe_surface(ctx,
+ gpe_context,
+ gpe_resource,
+ 0,
+ size / 4,
+ 0,
+ GEN9_AVC_MBENC_MAD_DATA_INDEX);
+ i965_zero_gpe_resource(gpe_resource);
+ }
+
+ /*brc updated mbenc curbe data buffer,it is ignored*/
+
+ /*artitratry num mbs in slice*/
+ if(avc_state->arbitrary_num_mbs_in_slice)
+ {
+ /*slice surface input*/
+ gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
+ }
+
+ /* BRC distortion data buffer for I frame */
+ if(!mbenc_i_frame_dist_in_use)
+ {
+ if(avc_state->mb_disable_skip_map_enable)
+ {
+ gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX);
+ }
+
+ if(avc_state->sfd_enable && generic_state->hme_enabled)
+ {
+ if(generic_state->frame_type == SLICE_TYPE_P)
+ {
+ gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
+
+ }else if(generic_state->frame_type == SLICE_TYPE_B)
+ {
+ gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
+ }
+
+ if(generic_state->frame_type != SLICE_TYPE_I)
+ {
+ gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+ gpe_resource,
+ 1,
+ I965_SURFACEFORMAT_R8_UNORM,
+ GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX);
+ }
+ }
+ }
+
+ return;
+}
+
+static VAStatus
+gen9_avc_kernel_mbenc(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ bool i_frame_dist_in_use)
+{
+ struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+ struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
+ struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+ struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+ struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
+
+ struct i965_gpe_context *gpe_context;
+ struct gpe_media_object_walker_parameter media_object_walker_param;
+ struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
+ unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+ int media_function = 0;
+ int kernel_idx = 0;
+ unsigned int mb_const_data_buffer_in_use = 0;
+ unsigned int mb_qp_buffer_in_use = 0;
+ unsigned int brc_enabled = 0;
+ unsigned int roi_enable = (generic_state->num_roi > 0)?1:0;
+ unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
+ struct mbenc_param param ;
+
+ int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
+ int mad_enable = 0;
+ VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+
+ mb_const_data_buffer_in_use =
+ generic_state->mb_brc_enabled ||
+ roi_enable ||
+ dirty_roi_enable ||
+ avc_state->mb_qp_data_enable ||
+ avc_state->rolling_intra_refresh_enable;
+ mb_qp_buffer_in_use =
+ generic_state->mb_brc_enabled ||
+ generic_state->brc_roi_enable ||
+ avc_state->mb_qp_data_enable;
+
+ if(mbenc_i_frame_dist_in_use)
+ {
+ media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
+ kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
+ downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
+ downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
+ mad_enable = 0;
+ brc_enabled = 0;
+
+ gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
+ }else
+ {
+ switch(generic_state->kernel_mode)
+ {
+ case INTEL_ENC_KERNEL_NORMAL :
+ {
+ media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
+ kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
+ break;
+ }
+ case INTEL_ENC_KERNEL_PERFORMANCE :
+ {
+ media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
+ kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
+ break;
+ }
+ case INTEL_ENC_KERNEL_QUALITY :
+ {
+ media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
+ kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
+ break;
+ }
+ default:
+ assert(0);
+
+ }
+
+ if(generic_state->frame_type == SLICE_TYPE_P)
+ {
+ kernel_idx += 1;
+ }
+ else if(generic_state->frame_type == SLICE_TYPE_B)
+ {
+ kernel_idx += 2;
+ }
+
+ downscaled_width_in_mb = generic_state->frame_width_in_mbs;
+ downscaled_height_in_mb = generic_state->frame_height_in_mbs;
+ mad_enable = avc_state->mad_enable;
+ brc_enabled = generic_state->brc_enabled;
+
+ gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
+ }
+
+ memset(¶m,0,sizeof(struct mbenc_param));
+
+ param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
+ param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
+ param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
+ param.mad_enable = mad_enable;
+ param.brc_enabled = brc_enabled;
+ param.roi_enabled = roi_enable;
+
+ if(avc_state->mb_status_supported)
+ {
+ param.mb_vproc_stats_enable = avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
+ }
+
+ if(!avc_state->mbenc_curbe_set_in_brc_update)
+ {
+ gen8_gpe_context_init(ctx, gpe_context);
+ }
+
+ gen9_gpe_reset_binding_table(ctx, gpe_context);
+
+ if(!avc_state->mbenc_curbe_set_in_brc_update)
+ {
+ /*set curbe here*/
+ generic_ctx->pfn_set_curbe_mbenc(ctx,encode_state,gpe_context,encoder_context,¶m);
+ }
+
+ /* MB brc const data buffer set up*/
+ if(mb_const_data_buffer_in_use)
+ {
+ gen9_avc_load_mb_brc_const_data(ctx,encode_state,encoder_context);
+ }
+
+ /*clear the mad buffer*/
+ if(mad_enable)
+ {
+ i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
+ }
+ /*send surface*/
+ generic_ctx->pfn_send_mbenc_surface(ctx,encode_state,gpe_context,encoder_context,¶m);
+
+ gen8_gpe_setup_interface_data(ctx, gpe_context);
+
+ /*walker setting*/
+ memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+
+ kernel_walker_param.use_scoreboard = 1;
+ kernel_walker_param.resolution_x = downscaled_width_in_mb ;
+ kernel_walker_param.resolution_y = downscaled_height_in_mb ;
+ if(mbenc_i_frame_dist_in_use)
+ {
+ kernel_walker_param.no_dependency = 1;
+ }else
+ {
+ switch(generic_state->frame_type)
+ {
+ case SLICE_TYPE_I:
+ kernel_walker_param.walker_degree = WALKER_45_DEGREE;
+ break;
+ case SLICE_TYPE_P:
+ kernel_walker_param.walker_degree = WALKER_26_DEGREE;
+ break;
+ case SLICE_TYPE_B:
+ kernel_walker_param.walker_degree = WALKER_26_DEGREE;
+ if(!slice_param->direct_spatial_mv_pred_flag)
+ {
+ kernel_walker_param.walker_degree = WALKER_45_DEGREE;
+ }
+ break;
+ default:
+ assert(0);
+ }
+ kernel_walker_param.no_dependency = 0;
+ }
+
+ i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+ gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+ gpe_context,
+ media_function,
+ &media_object_walker_param);
+ return VA_STATUS_SUCCESS;
+}
--
2.11.0
More information about the Libva
mailing list