[Libva] [PATCH 19/31] ENC: MBENC kernel for AVC encoder

Sean V Kelley seanvk at posteo.de
Tue Jan 10 23:38:01 UTC 2017


From: Pengfei Qu <Pengfei.Qu at intel.com>

Signed-off-by: Pengfei Qu <Pengfei.Qu at intel.com>
Reviewed-by: Sean V Kelley <seanvk at posteo.de>
---
 src/gen9_avc_encoder.c | 927 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 927 insertions(+)

diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c
index cd847a88..af581fc7 100755
--- a/src/gen9_avc_encoder.c
+++ b/src/gen9_avc_encoder.c
@@ -2844,3 +2844,930 @@ gen9_avc_load_mb_brc_const_data(VADriverContextP ctx,
     }
     i965_unmap_gpe_resource(gpe_resource);
 }
+
+static void
+gen9_avc_set_curbe_mbenc(VADriverContextP ctx,
+                         struct encode_state *encode_state,
+                         struct i965_gpe_context *gpe_context,
+                         struct intel_encoder_context *encoder_context,
+                         void * param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    gen9_avc_mbenc_curbe_data *cmd;
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
+
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+    VAEncPictureParameterBufferH264  *pic_param = avc_state->pic_param;
+    VASurfaceID surface_id;
+    struct object_surface *obj_surface;
+
+    struct mbenc_param * curbe_param = (struct mbenc_param *)param ;
+    unsigned char qp = 0;
+    unsigned char me_method = 0;
+    unsigned int mbenc_i_frame_dist_in_use = curbe_param->mbenc_i_frame_dist_in_use;
+    unsigned int table_idx = 0;
+
+    unsigned int preset = generic_state->preset;
+    me_method = (generic_state->frame_type == SLICE_TYPE_B)? gen9_avc_b_me_method[preset]:gen9_avc_p_me_method[preset];
+    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+
+    cmd = (gen9_avc_mbenc_curbe_data *)i965_gpe_context_map_curbe(gpe_context);
+    memset(cmd,0,sizeof(gen9_avc_mbenc_curbe_data));
+
+    if(mbenc_i_frame_dist_in_use)
+    {
+        memcpy(cmd,gen9_avc_mbenc_curbe_i_frame_dist_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+
+    }else
+    {
+        switch(generic_state->frame_type)
+        {
+        case SLICE_TYPE_I:
+            memcpy(cmd,gen9_avc_mbenc_curbe_normal_i_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+            break;
+        case SLICE_TYPE_P:
+            memcpy(cmd,gen9_avc_mbenc_curbe_normal_p_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+            break;
+        case SLICE_TYPE_B:
+            memcpy(cmd,gen9_avc_mbenc_curbe_normal_b_frame_init_data,sizeof(gen9_avc_mbenc_curbe_data));
+            break;
+        default:
+            assert(0);
+        }
+
+    }
+    cmd->dw0.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+    cmd->dw37.adaptive_enable = gen9_avc_enable_adaptive_search[preset];
+    cmd->dw0.t8x8_flag_for_inter_enable = pic_param->pic_fields.bits.transform_8x8_mode_flag;
+    cmd->dw37.t8x8_flag_for_inter_enable = pic_param->pic_fields.bits.transform_8x8_mode_flag;
+
+    cmd->dw2.max_len_sp = gen9_avc_max_len_sp[preset];
+    cmd->dw38.max_len_sp = 0;
+
+    cmd->dw3.src_access = 0;
+    cmd->dw3.ref_access = 0;
+
+    if(avc_state->ftq_enable && (generic_state->frame_type != SLICE_TYPE_I))
+    {
+        if(avc_state->ftq_override)
+        {
+            cmd->dw3.ftq_enable = avc_state->ftq_enable;
+
+        }else
+        {
+            if(generic_state->frame_type == SLICE_TYPE_P)
+            {
+                cmd->dw3.ftq_enable = gen9_avc_max_ftq_based_skip[preset] & 0x01;
+
+            }else
+            {
+                cmd->dw3.ftq_enable = (gen9_avc_max_ftq_based_skip[preset] >> 1) & 0x01;
+            }
+        }
+    }else
+    {
+        cmd->dw3.ftq_enable = 0;
+    }
+
+    if(avc_state->disable_sub_mb_partion)
+        cmd->dw3.sub_mb_part_mask = 0x7;
+
+    if(mbenc_i_frame_dist_in_use)
+    {
+        cmd->dw2.pitch_width = generic_state->downscaled_width_4x_in_mb;
+        cmd->dw4.picture_height_minus1 = generic_state->downscaled_height_4x_in_mb - 1;
+        cmd->dw5.slice_mb_height = (avc_state->slice_height + 4 - 1)/4;
+        cmd->dw6.batch_buffer_end = 0;
+        cmd->dw31.intra_compute_type = 1;
+
+    }else
+    {
+        cmd->dw2.pitch_width = generic_state->frame_width_in_mbs;
+        cmd->dw4.picture_height_minus1 = generic_state->frame_height_in_mbs - 1;
+        cmd->dw5.slice_mb_height = (avc_state->arbitrary_num_mbs_in_slice)?generic_state->frame_height_in_mbs:avc_state->slice_height;
+
+        {
+            memcpy(&(cmd->dw8),gen9_avc_mode_mv_cost_table[slice_type_kernel[generic_state->frame_type]][qp],8*sizeof(unsigned int));
+            if((generic_state->frame_type == SLICE_TYPE_I) && avc_state->old_mode_cost_enable)
+            {
+                //cmd->dw8 = gen9_avc_old_intra_mode_cost[qp];
+            }else if(avc_state->skip_bias_adjustment_enable)
+            {
+                /* Load different MvCost for P picture when SkipBiasAdjustment is enabled
+                // No need to check for P picture as the flag is only enabled for P picture */
+                cmd->dw11.value = gen9_avc_mv_cost_p_skip_adjustment[qp];
+
+            }
+        }
+
+        table_idx = (generic_state->frame_type == SLICE_TYPE_B)?1:0;
+        memcpy(&(cmd->dw16),table_enc_search_path[table_idx][me_method],16*sizeof(unsigned int));
+    }
+    cmd->dw4.enable_fbr_bypass = avc_state->fbr_bypass_enable;
+    cmd->dw4.enable_intra_cost_scaling_for_static_frame = avc_state->sfd_enable && generic_state->hme_enabled;
+    cmd->dw4.field_parity_flag = 0;//bottom field
+    cmd->dw4.enable_cur_fld_idr = 0;//field realted
+    cmd->dw4.contrained_intra_pred_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
+    cmd->dw4.hme_enable = generic_state->hme_enabled;
+    cmd->dw4.picture_type = slice_type_kernel[generic_state->frame_type];
+    cmd->dw4.use_actual_ref_qp_value = generic_state->hme_enabled && (gen9_avc_mr_disable_qp_check[preset] == 0);
+
+
+    cmd->dw7.intra_part_mask = pic_param->pic_fields.bits.transform_8x8_mode_flag?0:0x02;
+    cmd->dw7.src_field_polarity = 0;//field related
+
+    /*ftq_skip_threshold_lut set,dw14 /15*/
+
+    /*r5 disable NonFTQSkipThresholdLUT*/
+    if(generic_state->frame_type == SLICE_TYPE_P)
+    {
+        cmd->dw32.skip_val = gen9_avc_skip_value_p[avc_state->block_based_skip_enable][pic_param->pic_fields.bits.transform_8x8_mode_flag][qp];
+
+    }else if(generic_state->frame_type == SLICE_TYPE_B)
+    {
+        cmd->dw32.skip_val = gen9_avc_skip_value_b[avc_state->block_based_skip_enable][pic_param->pic_fields.bits.transform_8x8_mode_flag][qp];
+
+    }
+
+    cmd->dw13.qp_prime_y = qp;
+    cmd->dw13.qp_prime_cb = qp;
+    cmd->dw13.qp_prime_cr = qp;
+    cmd->dw13.target_size_in_word = 0xff;//hardcode for brc disable
+
+
+    if((generic_state->frame_type != SLICE_TYPE_I)&& avc_state->multi_pre_enable)
+    {
+        switch(gen9_avc_multi_pred[preset])
+        {
+        case 0:
+            cmd->dw32.mult_pred_l0_disable = 128;
+            cmd->dw32.mult_pred_l1_disable = 128;
+            break;
+        case 1:
+            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_P)?1:128;
+            cmd->dw32.mult_pred_l1_disable = 128;
+            break;
+        case 2:
+            cmd->dw32.mult_pred_l0_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
+            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
+            break;
+        case 3:
+            cmd->dw32.mult_pred_l0_disable = 1;
+            cmd->dw32.mult_pred_l1_disable = (generic_state->frame_type == SLICE_TYPE_B)?1:128;
+            break;
+
+        }
+
+    }else
+    {
+        cmd->dw32.mult_pred_l0_disable = 128;
+        cmd->dw32.mult_pred_l1_disable = 128;
+    }
+
+    /*field setting for dw33 34, ignored*/
+
+    if(avc_state->adaptive_transform_decision_enable)
+    {
+        if(generic_state->frame_type != SLICE_TYPE_I)
+        {
+            cmd->dw34.enable_adaptive_tx_decision = 1;
+        }
+
+        cmd->dw58.mb_texture_threshold = 1024;
+        cmd->dw58.tx_decision_threshold = 128;
+    }
+
+
+    if(generic_state->frame_type == SLICE_TYPE_B)
+    {
+        cmd->dw34.list1_ref_id0_frm_field_parity = 0; //frame only
+        cmd->dw34.list1_ref_id0_frm_field_parity = 0;
+        cmd->dw34.b_direct_mode = slice_param->direct_spatial_mv_pred_flag;
+    }
+    cmd->dw34.b_original_bff = 0; //frame only
+    cmd->dw34.enable_mb_flatness_check_optimization = avc_state->flatness_check_enable;
+    cmd->dw34.roi_enable_flag = curbe_param->roi_enabled;
+    cmd->dw34.mad_enable_falg = avc_state->mad_enable;
+    cmd->dw34.mb_brc_enable = avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled;
+    cmd->dw34.arbitray_num_mbs_per_slice = avc_state->arbitrary_num_mbs_in_slice;
+    cmd->dw34.force_non_skip_check = avc_state->mb_disable_skip_map_enable;
+
+    if(cmd->dw34.force_non_skip_check)
+    {
+       cmd->dw34.disable_enc_skip_check = avc_state->skip_check_disable;
+    }
+
+    cmd->dw36.check_all_fractional_enable = avc_state->caf_enable;
+    cmd->dw38.ref_threshold = 400;
+    cmd->dw39.hme_ref_windows_comb_threshold = (generic_state->frame_type == SLICE_TYPE_B)?gen9_avc_hme_b_combine_len[preset]:gen9_avc_hme_combine_len[preset];
+
+    /* Default:2 used for MBBRC (MB QP Surface width and height are 4x downscaled picture in MB unit * 4  bytes)
+       0 used for MBQP data surface (MB QP Surface width and height are same as the input picture size in MB unit * 1bytes)
+       starting GEN9, BRC use split kernel, MB QP surface is same size as input picture */
+    cmd->dw47.mb_qp_read_factor = (avc_state->mb_qp_data_enable || generic_state->mb_brc_enabled)?0:2;
+
+    if(mbenc_i_frame_dist_in_use)
+    {
+        cmd->dw13.qp_prime_y = 0;
+        cmd->dw13.qp_prime_cb = 0;
+        cmd->dw13.qp_prime_cr = 0;
+        cmd->dw33.intra_16x16_nondc_penalty = 0;
+        cmd->dw33.intra_8x8_nondc_penalty = 0;
+        cmd->dw33.intra_4x4_nondc_penalty = 0;
+
+    }
+    if(cmd->dw4.use_actual_ref_qp_value)
+    {
+        cmd->dw44.actual_qp_value_for_ref_id0_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,0);
+        cmd->dw44.actual_qp_value_for_ref_id1_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,1);
+        cmd->dw44.actual_qp_value_for_ref_id2_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,2);
+        cmd->dw44.actual_qp_value_for_ref_id3_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,3);
+        cmd->dw45.actual_qp_value_for_ref_id4_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,4);
+        cmd->dw45.actual_qp_value_for_ref_id5_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,5);
+        cmd->dw45.actual_qp_value_for_ref_id6_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,6);
+        cmd->dw45.actual_qp_value_for_ref_id7_list0 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,0,7);
+        cmd->dw46.actual_qp_value_for_ref_id0_list1 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,0);
+        cmd->dw46.actual_qp_value_for_ref_id1_list1 =  gen9_avc_get_qp_from_ref_list(ctx,slice_param,1,1);
+    }
+
+    table_idx = slice_type_kernel[generic_state->frame_type];
+    cmd->dw46.ref_cost = gen9_avc_ref_cost[table_idx][qp];
+
+    if(generic_state->frame_type == SLICE_TYPE_I)
+    {
+        cmd->dw0.skip_mode_enable = 0;
+        cmd->dw37.skip_mode_enable = 0;
+        cmd->dw36.hme_combine_overlap = 0;
+        cmd->dw47.intra_cost_sf = 16;
+        cmd->dw34.enable_direct_bias_adjustment = 0;
+        cmd->dw34.enable_global_motion_bias_adjustment = 0;
+
+    }else if(generic_state->frame_type == SLICE_TYPE_P)
+    {
+        cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
+        cmd->dw3.bme_disable_fbr = 1;
+        cmd->dw5.ref_width = gen9_avc_search_x[preset];
+        cmd->dw5.ref_height = gen9_avc_search_y[preset];
+        cmd->dw7.non_skip_zmv_added = 1;
+        cmd->dw7.non_skip_mode_added = 1;
+        cmd->dw7.skip_center_mask = 1;
+        cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
+        cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame onlys
+        cmd->dw36.hme_combine_overlap = 1;
+        cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
+        cmd->dw39.ref_width = gen9_avc_search_x[preset];
+        cmd->dw39.ref_height = gen9_avc_search_y[preset];
+        cmd->dw34.enable_direct_bias_adjustment = 0;
+        cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
+        if(avc_state->global_motion_bias_adjustment_enable)
+            cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
+
+    }else
+    {
+        cmd->dw1.max_num_mvs = i965_avc_get_max_mv_per_2mb(avc_state->seq_param->level_idc)/2;
+        cmd->dw1.bi_weight = avc_state->bi_weight;
+        cmd->dw3.search_ctrl = 7;
+        cmd->dw3.skip_type = 1;
+        cmd->dw5.ref_width = gen9_avc_b_search_x[preset];
+        cmd->dw5.ref_height = gen9_avc_b_search_y[preset];
+        cmd->dw7.skip_center_mask = 0xff;
+        cmd->dw47.intra_cost_sf = (avc_state->adaptive_intra_scaling_enable)?gen9_avc_adaptive_intra_scaling_factor[qp]:gen9_avc_intra_scaling_factor[qp];
+        cmd->dw47.max_vmv_r = i965_avc_get_max_mv_len(avc_state->seq_param->level_idc) * 4;//frame only
+        cmd->dw36.hme_combine_overlap = 1;
+        surface_id = slice_param->RefPicList1[0].picture_id;
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface)
+        {
+            WARN_ONCE("Invalid backward reference frame\n");
+            return;
+        }
+        cmd->dw36.is_fwd_frame_short_term_ref = !!( slice_param->RefPicList1[0].flags & VA_PICTURE_H264_SHORT_TERM_REFERENCE);
+
+        cmd->dw36.num_ref_idx_l0_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l0_active_minus1:0;
+        cmd->dw36.num_ref_idx_l1_minus_one = (avc_state->multi_pre_enable)?slice_param->num_ref_idx_l1_active_minus1:0;
+        cmd->dw39.ref_width = gen9_avc_b_search_x[preset];
+        cmd->dw39.ref_height = gen9_avc_b_search_y[preset];
+        cmd->dw40.dist_scale_factor_ref_id0_list0 = avc_state->dist_scale_factor_list0[0];
+        cmd->dw40.dist_scale_factor_ref_id1_list0 = avc_state->dist_scale_factor_list0[1];
+        cmd->dw41.dist_scale_factor_ref_id2_list0 = avc_state->dist_scale_factor_list0[2];
+        cmd->dw41.dist_scale_factor_ref_id3_list0 = avc_state->dist_scale_factor_list0[3];
+        cmd->dw42.dist_scale_factor_ref_id4_list0 = avc_state->dist_scale_factor_list0[4];
+        cmd->dw42.dist_scale_factor_ref_id5_list0 = avc_state->dist_scale_factor_list0[5];
+        cmd->dw43.dist_scale_factor_ref_id6_list0 = avc_state->dist_scale_factor_list0[6];
+        cmd->dw43.dist_scale_factor_ref_id7_list0 = avc_state->dist_scale_factor_list0[7];
+
+        cmd->dw34.enable_direct_bias_adjustment = avc_state->direct_bias_adjustment_enable;
+        if(cmd->dw34.enable_direct_bias_adjustment)
+        {
+            cmd->dw7.non_skip_zmv_added = 1;
+            cmd->dw7.non_skip_mode_added = 1;
+        }
+
+        cmd->dw34.enable_global_motion_bias_adjustment = avc_state->global_motion_bias_adjustment_enable;
+        if(avc_state->global_motion_bias_adjustment_enable)
+            cmd->dw59.hme_mv_cost_scaling_factor = avc_state->hme_mv_cost_scaling_factor;
+
+    }
+
+    avc_state->block_based_skip_enable = cmd->dw3.block_based_skip_enable;
+
+    if(avc_state->rolling_intra_refresh_enable)
+    {
+        /*by now disable it*/
+        cmd->dw34.widi_intra_refresh_en = avc_state->rolling_intra_refresh_enable;
+
+    }else
+    {
+        cmd->dw34.widi_intra_refresh_en = 0;
+    }
+
+    cmd->dw34.enable_per_mb_static_check = avc_state->sfd_enable && generic_state->hme_enabled;
+    cmd->dw34.enable_adaptive_search_window_size = avc_state->adaptive_search_window_enable;
+
+    /*roi set disable by now. 49-56*/
+    if(curbe_param->roi_enabled)
+    {
+        cmd->dw49.roi_1_x_left   = generic_state->roi[0].left;
+        cmd->dw49.roi_1_y_top    = generic_state->roi[0].top;
+        cmd->dw50.roi_1_x_right  = generic_state->roi[0].right;
+        cmd->dw50.roi_1_y_bottom = generic_state->roi[0].bottom;
+
+        cmd->dw51.roi_2_x_left   = generic_state->roi[1].left;
+        cmd->dw51.roi_2_y_top    = generic_state->roi[1].top;
+        cmd->dw52.roi_2_x_right  = generic_state->roi[1].right;
+        cmd->dw52.roi_2_y_bottom = generic_state->roi[1].bottom;
+
+        cmd->dw53.roi_3_x_left   = generic_state->roi[2].left;
+        cmd->dw53.roi_3_y_top    = generic_state->roi[2].top;
+        cmd->dw54.roi_3_x_right  = generic_state->roi[2].right;
+        cmd->dw54.roi_3_y_bottom = generic_state->roi[2].bottom;
+
+        cmd->dw55.roi_4_x_left   = generic_state->roi[3].left;
+        cmd->dw55.roi_4_y_top    = generic_state->roi[3].top;
+        cmd->dw56.roi_4_x_right  = generic_state->roi[3].right;
+        cmd->dw56.roi_4_y_bottom = generic_state->roi[3].bottom;
+
+        if(!generic_state->brc_enabled)
+        {
+            char tmp = 0;
+            tmp = generic_state->roi[0].value;
+            CLIP(tmp,-qp,52-qp);
+            cmd->dw57.roi_1_dqp_prime_y = tmp;
+            tmp = generic_state->roi[1].value;
+            CLIP(tmp,-qp,52-qp);
+            cmd->dw57.roi_2_dqp_prime_y = tmp;
+            tmp = generic_state->roi[2].value;
+            CLIP(tmp,-qp,52-qp);
+            cmd->dw57.roi_3_dqp_prime_y = tmp;
+            tmp = generic_state->roi[3].value;
+            CLIP(tmp,-qp,52-qp);
+            cmd->dw57.roi_4_dqp_prime_y = tmp;
+        }else
+        {
+            cmd->dw34.roi_enable_flag = 0;
+        }
+    }
+
+    cmd->dw64.mb_data_surf_index = GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX;
+    cmd->dw65.mv_data_surf_index = GEN9_AVC_MBENC_IND_MV_DATA_INDEX;
+    cmd->dw66.i_dist_surf_index = GEN9_AVC_MBENC_BRC_DISTORTION_INDEX;
+    cmd->dw67.src_y_surf_index = GEN9_AVC_MBENC_CURR_Y_INDEX;
+    cmd->dw68.mb_specific_data_surf_index = GEN9_AVC_MBENC_MB_SPECIFIC_DATA_INDEX;
+    cmd->dw69.aux_vme_out_surf_index = GEN9_AVC_MBENC_AUX_VME_OUT_INDEX;
+    cmd->dw70.curr_ref_pic_sel_surf_index = GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX;
+    cmd->dw71.hme_mv_pred_fwd_bwd_surf_index = GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX;
+    cmd->dw72.hme_dist_surf_index = GEN9_AVC_MBENC_4XME_DISTORTION_INDEX;
+    cmd->dw73.slice_map_surf_index = GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX;
+    cmd->dw74.fwd_frm_mb_data_surf_index = GEN9_AVC_MBENC_FWD_MB_DATA_INDEX;
+    cmd->dw75.fwd_frm_mv_surf_index = GEN9_AVC_MBENC_FWD_MV_DATA_INDEX;
+    cmd->dw76.mb_qp_buffer = GEN9_AVC_MBENC_MBQP_INDEX;
+    cmd->dw77.mb_brc_lut = GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX;
+    cmd->dw78.vme_inter_prediction_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX;
+    cmd->dw79.vme_inter_prediction_mr_surf_index = GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX;
+    cmd->dw80.mb_stats_surf_index = GEN9_AVC_MBENC_MB_STATS_INDEX;
+    cmd->dw81.mad_surf_index = GEN9_AVC_MBENC_MAD_DATA_INDEX;
+    cmd->dw82.force_non_skip_mb_map_surface = GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX;
+    cmd->dw83.widi_wa_surf_index = GEN9_AVC_MBENC_WIDI_WA_INDEX;
+    cmd->dw84.brc_curbe_surf_index = GEN9_AVC_MBENC_BRC_CURBE_DATA_INDEX;
+    cmd->dw85.static_detection_cost_table_index = GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX;
+
+    i965_gpe_context_unmap_curbe(gpe_context);
+
+    return;
+}
+
+static void
+gen9_avc_send_surface_mbenc(VADriverContextP ctx,
+                            struct encode_state *encode_state,
+                            struct i965_gpe_context *gpe_context,
+                            struct intel_encoder_context *encoder_context,
+                            void * param_mbenc)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
+    struct object_surface *obj_surface;
+    struct gen9_surface_avc *avc_priv_surface;
+    struct i965_gpe_resource *gpe_resource;
+    struct mbenc_param * param = (struct mbenc_param *)param_mbenc ;
+     VASurfaceID surface_id;
+    unsigned int mbenc_i_frame_dist_in_use = param->mbenc_i_frame_dist_in_use;
+    unsigned int size = 0;
+    unsigned int w_mb = generic_state->frame_width_in_mbs;
+    unsigned int h_mb = generic_state->frame_height_in_mbs;
+    int i = 0;
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+
+    obj_surface = encode_state->reconstructed_object;
+
+    if (!obj_surface || !obj_surface->private_data)
+        return;
+    avc_priv_surface = obj_surface->private_data;
+
+    /*pak obj command buffer output*/
+    size = w_mb * h_mb * 16 * 4;
+    gpe_resource = &avc_priv_surface->res_mb_code_surface;
+    gen9_add_buffer_gpe_surface(ctx,
+                                gpe_context,
+                                gpe_resource,
+                                0,
+                                size / 4,
+                                0,
+                                GEN9_AVC_MBENC_MFC_AVC_PAK_OBJ_INDEX);
+
+    /*mv data buffer output*/
+    size = w_mb * h_mb * 32 * 4;
+    gpe_resource = &avc_priv_surface->res_mv_data_surface;
+    gen9_add_buffer_gpe_surface(ctx,
+                                gpe_context,
+                                gpe_resource,
+                                0,
+                                size / 4,
+                                0,
+                                GEN9_AVC_MBENC_IND_MV_DATA_INDEX);
+
+    /*input current  YUV surface, current input Y/UV object*/
+    if(mbenc_i_frame_dist_in_use)
+    {
+        obj_surface = encode_state->reconstructed_object;
+        if (!obj_surface || !obj_surface->private_data)
+            return;
+        avc_priv_surface = obj_surface->private_data;
+        obj_surface = avc_priv_surface->scaled_4x_surface_obj;
+    }else
+    {
+        obj_surface = encode_state->input_yuv_object;
+    }
+    gen9_add_2d_gpe_surface(ctx,
+                            gpe_context,
+                            obj_surface,
+                            0,
+                            1,
+                            I965_SURFACEFORMAT_R8_UNORM,
+                            GEN9_AVC_MBENC_CURR_Y_INDEX);
+
+    gen9_add_2d_gpe_surface(ctx,
+                            gpe_context,
+                            obj_surface,
+                            1,
+                            1,
+                            I965_SURFACEFORMAT_R16_UINT,
+                            GEN9_AVC_MBENC_CURR_UV_INDEX);
+
+    if(generic_state->hme_enabled)
+    {
+        /*memv input 4x*/
+        gpe_resource = &(avc_ctx->s4x_memv_data_buffer);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_MV_DATA_FROM_ME_INDEX);
+        /* memv distortion input*/
+        gpe_resource = &(avc_ctx->s4x_memv_distortion_buffer);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_4XME_DISTORTION_INDEX);
+    }
+
+    /*mbbrc const data_buffer*/
+    if(param->mb_const_data_buffer_in_use)
+    {
+        size = 16 * 52 * sizeof(unsigned int);
+        gpe_resource = &avc_ctx->res_mbbrc_const_data_buffer;
+        gen9_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_MBENC_MBBRC_CONST_DATA_INDEX);
+
+    }
+
+    /*mb qp data_buffer*/
+    if(param->mb_qp_buffer_in_use)
+    {
+        if(avc_state->mb_qp_data_enable)
+            gpe_resource = &(avc_ctx->res_mb_qp_data_surface);
+        else
+            gpe_resource = &(avc_ctx->res_mbbrc_mb_qp_data_surface);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_MBQP_INDEX);
+    }
+
+    /*input current  YUV surface, current input Y/UV object*/
+    if(mbenc_i_frame_dist_in_use)
+    {
+        obj_surface = encode_state->reconstructed_object;
+        if (!obj_surface || !obj_surface->private_data)
+            return;
+        avc_priv_surface = obj_surface->private_data;
+        obj_surface = avc_priv_surface->scaled_4x_surface_obj;
+    }else
+    {
+        obj_surface = encode_state->input_yuv_object;
+    }
+    gen9_add_adv_gpe_surface(ctx, gpe_context,
+                             obj_surface,
+                             GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX);
+    /*input ref YUV surface*/
+    for(i = 0; i < slice_param->num_ref_idx_l0_active_minus1 + 1; i++)
+    {
+        surface_id = slice_param->RefPicList0[i].picture_id;
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface || !obj_surface->private_data)
+            break;
+
+        gen9_add_adv_gpe_surface(ctx, gpe_context,
+                                 obj_surface,
+                                 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 1);
+    }
+    /*input current  YUV surface, current input Y/UV object*/
+    if(mbenc_i_frame_dist_in_use)
+    {
+        obj_surface = encode_state->reconstructed_object;
+        if (!obj_surface || !obj_surface->private_data)
+            return;
+        avc_priv_surface = obj_surface->private_data;
+        obj_surface = avc_priv_surface->scaled_4x_surface_obj;
+    }else
+    {
+        obj_surface = encode_state->input_yuv_object;
+    }
+    gen9_add_adv_gpe_surface(ctx, gpe_context,
+                             obj_surface,
+                             GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX);
+
+    for(i = 0; i < slice_param->num_ref_idx_l1_active_minus1 + 1; i++)
+    {
+        if(i > 0) break;// only  one ref supported here for B frame
+        surface_id = slice_param->RefPicList1[i].picture_id;
+        obj_surface = SURFACE(surface_id);
+        if (!obj_surface || !obj_surface->private_data)
+            break;
+
+        gen9_add_adv_gpe_surface(ctx, gpe_context,
+                                 obj_surface,
+                                 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1);
+        gen9_add_adv_gpe_surface(ctx, gpe_context,
+                                 obj_surface,
+                                 GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_0_INDEX+i*2 + 2);
+        if(i == 0)
+        {
+            avc_priv_surface = obj_surface->private_data;
+            /*pak obj command buffer output(mb code)*/
+            size = w_mb * h_mb * 16 * 4;
+            gpe_resource = &avc_priv_surface->res_mb_code_surface;
+            gen9_add_buffer_gpe_surface(ctx,
+                                        gpe_context,
+                                        gpe_resource,
+                                        0,
+                                        size / 4,
+                                        0,
+                                        GEN9_AVC_MBENC_FWD_MB_DATA_INDEX);
+
+            /*mv data buffer output*/
+            size = w_mb * h_mb * 32 * 4;
+            gpe_resource = &avc_priv_surface->res_mv_data_surface;
+            gen9_add_buffer_gpe_surface(ctx,
+                                        gpe_context,
+                                        gpe_resource,
+                                        0,
+                                        size / 4,
+                                        0,
+                                        GEN9_AVC_MBENC_FWD_MV_DATA_INDEX);
+
+        }
+
+        if( i < INTEL_AVC_MAX_BWD_REF_NUM)
+        {
+            gen9_add_adv_gpe_surface(ctx, gpe_context,
+                                     obj_surface,
+                                     GEN9_AVC_MBENC_VME_INTER_PRED_CURR_PIC_IDX_1_INDEX+i*2 + 1 + INTEL_AVC_MAX_BWD_REF_NUM);
+        }
+
+    }
+
+    /* BRC distortion data buffer for I frame*/
+    if(mbenc_i_frame_dist_in_use)
+    {
+        gpe_resource = &(avc_ctx->res_brc_dist_data_surface);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_BRC_DISTORTION_INDEX);
+    }
+
+    /* as ref frame ,update later RefPicSelect of Current Picture*/
+    obj_surface = encode_state->reconstructed_object;
+    avc_priv_surface = obj_surface->private_data;
+    if(avc_state->ref_pic_select_list_supported && avc_priv_surface->is_as_ref)
+    {
+        gpe_resource = &(avc_priv_surface->res_ref_pic_select_surface);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_REFPICSELECT_L0_INDEX);
+
+    }
+
+    if(param->mb_vproc_stats_enable)
+    {
+        /*mb status buffer input*/
+        size = w_mb * h_mb * 16 * 4;
+        gpe_resource = &(avc_ctx->res_mb_status_buffer);
+        gen9_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_MBENC_MB_STATS_INDEX);
+
+    }else if(avc_state->flatness_check_enable)
+    {
+
+        gpe_resource = &(avc_ctx->res_flatness_check_surface);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_MB_STATS_INDEX);
+    }
+
+    if(param->mad_enable)
+    {
+        /*mad buffer input*/
+        size = 4;
+        gpe_resource = &(avc_ctx->res_mad_data_buffer);
+        gen9_add_buffer_gpe_surface(ctx,
+                                    gpe_context,
+                                    gpe_resource,
+                                    0,
+                                    size / 4,
+                                    0,
+                                    GEN9_AVC_MBENC_MAD_DATA_INDEX);
+        i965_zero_gpe_resource(gpe_resource);
+    }
+
+    /*brc updated mbenc curbe data buffer,it is ignored*/
+
+    /*artitratry num mbs in slice*/
+    if(avc_state->arbitrary_num_mbs_in_slice)
+    {
+        /*slice surface input*/
+        gpe_resource = &(avc_ctx->res_mbenc_slice_map_surface);
+        gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                       gpe_resource,
+                                       1,
+                                       I965_SURFACEFORMAT_R8_UNORM,
+                                       GEN9_AVC_MBENC_SLICEMAP_DATA_INDEX);
+    }
+
+    /* BRC distortion data buffer for I frame */
+    if(!mbenc_i_frame_dist_in_use)
+    {
+        if(avc_state->mb_disable_skip_map_enable)
+        {
+            gpe_resource = &(avc_ctx->res_mb_disable_skip_map_surface);
+            gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                           gpe_resource,
+                                           1,
+                                           I965_SURFACEFORMAT_R8_UNORM,
+                                           GEN9_AVC_MBENC_FORCE_NONSKIP_MB_MAP_INDEX);
+        }
+
+        if(avc_state->sfd_enable && generic_state->hme_enabled)
+        {
+            if(generic_state->frame_type == SLICE_TYPE_P)
+            {
+                gpe_resource = &(avc_ctx->res_sfd_cost_table_p_frame_buffer);
+
+            }else if(generic_state->frame_type == SLICE_TYPE_B)
+            {
+                gpe_resource = &(avc_ctx->res_sfd_cost_table_b_frame_buffer);
+            }
+
+            if(generic_state->frame_type != SLICE_TYPE_I)
+            {
+                gen9_add_buffer_2d_gpe_surface(ctx, gpe_context,
+                                               gpe_resource,
+                                               1,
+                                               I965_SURFACEFORMAT_R8_UNORM,
+                                               GEN9_AVC_MBENC_SFD_COST_TABLE_INDEX);
+            }
+        }
+    }
+
+    return;
+}
+
+static VAStatus
+gen9_avc_kernel_mbenc(VADriverContextP ctx,
+                      struct encode_state *encode_state,
+                      struct intel_encoder_context *encoder_context,
+                      bool i_frame_dist_in_use)
+{
+    struct encoder_vme_mfc_context * vme_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
+    struct generic_encoder_context * generic_ctx = (struct generic_encoder_context * )vme_context->generic_enc_ctx;
+    struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )vme_context->private_enc_ctx;
+    struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )vme_context->generic_enc_state;
+    struct avc_enc_state * avc_state = (struct avc_enc_state * )vme_context->private_enc_state;
+
+    struct i965_gpe_context *gpe_context;
+    struct gpe_media_object_walker_parameter media_object_walker_param;
+    struct gpe_encoder_kernel_walker_parameter kernel_walker_param;
+    unsigned int downscaled_width_in_mb, downscaled_height_in_mb;
+    int media_function = 0;
+    int kernel_idx = 0;
+    unsigned int mb_const_data_buffer_in_use = 0;
+    unsigned int mb_qp_buffer_in_use = 0;
+    unsigned int brc_enabled = 0;
+    unsigned int roi_enable = (generic_state->num_roi > 0)?1:0;
+    unsigned int dirty_roi_enable = ((generic_state->dirty_num_roi > 0) && (generic_state->frame_type == SLICE_TYPE_P) && (0));
+    struct mbenc_param param ;
+
+    int mbenc_i_frame_dist_in_use = i_frame_dist_in_use;
+    int mad_enable = 0;
+    VAEncSliceParameterBufferH264 * slice_param = avc_state->slice_param[0];
+
+    mb_const_data_buffer_in_use =
+        generic_state->mb_brc_enabled ||
+        roi_enable ||
+        dirty_roi_enable ||
+        avc_state->mb_qp_data_enable ||
+        avc_state->rolling_intra_refresh_enable;
+    mb_qp_buffer_in_use =
+        generic_state->mb_brc_enabled ||
+        generic_state->brc_roi_enable ||
+        avc_state->mb_qp_data_enable;
+
+    if(mbenc_i_frame_dist_in_use)
+    {
+        media_function = INTEL_MEDIA_STATE_ENC_I_FRAME_DIST;
+        kernel_idx = GEN9_AVC_KERNEL_BRC_I_FRAME_DIST;
+        downscaled_width_in_mb = generic_state->downscaled_width_4x_in_mb;
+        downscaled_height_in_mb = generic_state->downscaled_height_4x_in_mb;
+        mad_enable = 0;
+        brc_enabled = 0;
+
+        gpe_context = &(avc_ctx->context_brc.gpe_contexts[kernel_idx]);
+    }else
+    {
+        switch(generic_state->kernel_mode)
+        {
+        case INTEL_ENC_KERNEL_NORMAL :
+            {
+                media_function = INTEL_MEDIA_STATE_ENC_NORMAL;
+                kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_NORMAL_I;
+                break;
+            }
+        case INTEL_ENC_KERNEL_PERFORMANCE :
+            {
+                media_function = INTEL_MEDIA_STATE_ENC_PERFORMANCE;
+                kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_PERFORMANCE_I;
+                break;
+            }
+        case INTEL_ENC_KERNEL_QUALITY :
+            {
+                media_function = INTEL_MEDIA_STATE_ENC_QUALITY;
+                kernel_idx = MBENC_KERNEL_BASE + GEN9_AVC_KERNEL_MBENC_QUALITY_I;
+                break;
+            }
+        default:
+            assert(0);
+
+        }
+
+        if(generic_state->frame_type == SLICE_TYPE_P)
+        {
+           kernel_idx += 1;
+        }
+        else if(generic_state->frame_type == SLICE_TYPE_B)
+        {
+           kernel_idx += 2;
+        }
+
+        downscaled_width_in_mb = generic_state->frame_width_in_mbs;
+        downscaled_height_in_mb = generic_state->frame_height_in_mbs;
+        mad_enable = avc_state->mad_enable;
+        brc_enabled = generic_state->brc_enabled;
+
+        gpe_context = &(avc_ctx->context_mbenc.gpe_contexts[kernel_idx]);
+    }
+
+    memset(&param,0,sizeof(struct mbenc_param));
+
+    param.mb_const_data_buffer_in_use = mb_const_data_buffer_in_use;
+    param.mb_qp_buffer_in_use = mb_qp_buffer_in_use;
+    param.mbenc_i_frame_dist_in_use = mbenc_i_frame_dist_in_use;
+    param.mad_enable = mad_enable;
+    param.brc_enabled = brc_enabled;
+    param.roi_enabled = roi_enable;
+
+    if(avc_state->mb_status_supported)
+    {
+        param.mb_vproc_stats_enable =  avc_state->flatness_check_enable || avc_state->adaptive_transform_decision_enable;
+    }
+
+    if(!avc_state->mbenc_curbe_set_in_brc_update)
+    {
+        gen8_gpe_context_init(ctx, gpe_context);
+    }
+
+    gen9_gpe_reset_binding_table(ctx, gpe_context);
+
+    if(!avc_state->mbenc_curbe_set_in_brc_update)
+    {
+        /*set curbe here*/
+        generic_ctx->pfn_set_curbe_mbenc(ctx,encode_state,gpe_context,encoder_context,&param);
+    }
+
+    /* MB brc const data buffer set up*/
+    if(mb_const_data_buffer_in_use)
+    {
+        gen9_avc_load_mb_brc_const_data(ctx,encode_state,encoder_context);
+    }
+
+    /*clear the mad buffer*/
+    if(mad_enable)
+    {
+        i965_zero_gpe_resource(&(avc_ctx->res_mad_data_buffer));
+    }
+    /*send surface*/
+    generic_ctx->pfn_send_mbenc_surface(ctx,encode_state,gpe_context,encoder_context,&param);
+
+    gen8_gpe_setup_interface_data(ctx, gpe_context);
+
+    /*walker setting*/
+    memset(&kernel_walker_param, 0, sizeof(kernel_walker_param));
+
+    kernel_walker_param.use_scoreboard = 1;
+    kernel_walker_param.resolution_x = downscaled_width_in_mb ;
+    kernel_walker_param.resolution_y = downscaled_height_in_mb ;
+    if(mbenc_i_frame_dist_in_use)
+    {
+        kernel_walker_param.no_dependency = 1;
+    }else
+    {
+        switch(generic_state->frame_type)
+        {
+        case SLICE_TYPE_I:
+            kernel_walker_param.walker_degree = WALKER_45_DEGREE;
+            break;
+        case SLICE_TYPE_P:
+            kernel_walker_param.walker_degree = WALKER_26_DEGREE;
+            break;
+        case SLICE_TYPE_B:
+            kernel_walker_param.walker_degree = WALKER_26_DEGREE;
+            if(!slice_param->direct_spatial_mv_pred_flag)
+            {
+                kernel_walker_param.walker_degree = WALKER_45_DEGREE;
+            }
+            break;
+        default:
+            assert(0);
+        }
+        kernel_walker_param.no_dependency = 0;
+    }
+
+    i965_init_media_object_walker_parameter(&kernel_walker_param, &media_object_walker_param);
+
+    gen9_avc_run_kernel_media_object_walker(ctx, encoder_context,
+                                        gpe_context,
+                                        media_function,
+                                        &media_object_walker_param);
+    return VA_STATUS_SUCCESS;
+}
-- 
2.11.0




More information about the Libva mailing list