[Libva] [PATCH 25/31] ENC: add MFX command for AVC encoder
Zhao Yakui
yakui.zhao at intel.com
Wed Jan 11 05:23:17 UTC 2017
On 01/11/2017 07:38 AM, Sean V Kelley wrote:
> From: Pengfei Qu<Pengfei.Qu at intel.com>
>
The OUT_BCS_RELOC issue still exits.
Please use the OUT_BCS_RELOC64 instead.
> Signed-off-by: Pengfei Qu<Pengfei.Qu at intel.com>
> Reviewed-by: Sean V Kelley<seanvk at posteo.de>
> ---
> src/gen9_avc_encoder.c | 399 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 399 insertions(+)
>
> diff --git a/src/gen9_avc_encoder.c b/src/gen9_avc_encoder.c
> index 629a0dab..8b11c5a3 100755
> --- a/src/gen9_avc_encoder.c
> +++ b/src/gen9_avc_encoder.c
> @@ -5625,3 +5625,402 @@ gen9_avc_kernel_init(VADriverContextP ctx,
> generic_ctx->pfn_send_sfd_surface = gen9_avc_send_surface_sfd;
> generic_ctx->pfn_send_wp_surface = gen9_avc_send_surface_wp;
> }
> +
> +/*
> +PAK pipeline related function
> +*/
> +extern int
> +intel_avc_enc_slice_type_fixup(int slice_type);
> +
> +static void
> +gen9_mfc_avc_pipe_mode_select(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
> + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> +
> + BEGIN_BCS_BATCH(batch, 5);
> +
> + OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
> + OUT_BCS_BATCH(batch,
> + (0<< 29) |
> + (MFX_LONG_MODE<< 17) | /* Must be long format for encoder */
> + (MFD_MODE_VLD<< 15) |
> + (0<< 13) | /* VDEnc mode is 1*/
> + ((generic_state->curr_pak_pass != (generic_state->num_pak_passes -1))<< 10) | /* Stream-Out Enable */
> + ((!!avc_ctx->res_post_deblocking_output.bo)<< 9) | /* Post Deblocking Output */
> + ((!!avc_ctx->res_pre_deblocking_output.bo)<< 8) | /* Pre Deblocking Output */
> + (0<< 7) | /* Scaled surface enable */
> + (0<< 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
> + (0<< 5) | /* not in stitch mode */
> + (1<< 4) | /* encoding mode */
> + (MFX_FORMAT_AVC<< 0));
> + OUT_BCS_BATCH(batch,
> + (0<< 7) | /* expand NOA bus flag */
> + (0<< 6) | /* disable slice-level clock gating */
> + (0<< 5) | /* disable clock gating for NOA */
> + (0<< 4) | /* terminate if AVC motion and POC table error occurs */
> + (0<< 3) | /* terminate if AVC mbdata error occurs */
> + (0<< 2) | /* terminate if AVC CABAC/CAVLC decode error occurs */
> + (0<< 1) |
> + (0<< 0));
> + OUT_BCS_BATCH(batch, 0);
> + OUT_BCS_BATCH(batch, 0);
> +
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_avc_surface_state(VADriverContextP ctx,
> + struct intel_encoder_context *encoder_context,
> + struct i965_gpe_resource *gpe_resource,
> + int id)
> +{
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> +
> + BEGIN_BCS_BATCH(batch, 6);
> +
> + OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
> + OUT_BCS_BATCH(batch, id);
> + OUT_BCS_BATCH(batch,
> + ((gpe_resource->height - 1)<< 18) |
> + ((gpe_resource->width - 1)<< 4));
> + OUT_BCS_BATCH(batch,
> + (MFX_SURFACE_PLANAR_420_8<< 28) | /* 420 planar YUV surface */
> + (1<< 27) | /* must be 1 for interleave U/V, hardware requirement */
> + ((gpe_resource->pitch - 1)<< 3) | /* pitch */
> + (0<< 2) | /* must be 0 for interleave U/V */
> + (1<< 1) | /* must be tiled */
> + (I965_TILEWALK_YMAJOR<< 0)); /* tile walk, TILEWALK_YMAJOR */
> + OUT_BCS_BATCH(batch,
> + (0<< 16) | /* must be 0 for interleave U/V */
> + (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
> + OUT_BCS_BATCH(batch,
> + (0<< 16) | /* must be 0 for interleave U/V */
> + (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
> +
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_avc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
> +{
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + int i;
> +
> + BEGIN_BCS_BATCH(batch, 65);
> +
> + OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
> +
> + /* the DW1-3 is for pre_deblocking */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_pre_deblocking_output.bo, 1, 0, 0);
> +
> + /* the DW4-6 is for the post_deblocking */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_post_deblocking_output.bo, 1, 0, 0);
> +
> + /* the DW7-9 is for the uncompressed_picture */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_uncompressed_input_surface.bo, 1, 0, 0);
> +
> + /* the DW10-12 is for PAK information (write) */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);//?
> +
> + /* the DW13-15 is for the intra_row_store_scratch */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_intra_row_store_scratch_buffer.bo, 1, 0, 0);
> +
> + /* the DW16-18 is for the deblocking filter */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_deblocking_filter_row_store_scratch_buffer.bo, 1, 0, 0);
> +
> + /* the DW 19-50 is for Reference pictures*/
> + for (i = 0; i< ARRAY_ELEMS(avc_ctx->list_reference_res); i++) {
> + OUT_BUFFER_2DW(batch, avc_ctx->list_reference_res[i].bo, 1, 0);
> + }
> +
> + /* DW 51, reference picture attributes */
> + OUT_BCS_BATCH(batch, 0);
> +
> + /* The DW 52-54 is for PAK information (read) */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_pak_mb_status_buffer.bo, 1, 0, 0);
> +
> + /* the DW 55-57 is the ILDB buffer */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> +
> + /* the DW 58-60 is the second ILDB buffer */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> +
> + /* DW 61, memory compress enable& mode */
> + OUT_BCS_BATCH(batch, 0);
> +
> + /* the DW 62-64 is the buffer */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> +
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_avc_ind_obj_base_addr_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
> + struct generic_enc_codec_state * generic_state = (struct generic_enc_codec_state * )pak_context->generic_enc_state;
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + struct object_surface *obj_surface;
> + struct gen9_surface_avc *avc_priv_surface;
> + unsigned int size = 0;
> + unsigned int w_mb = generic_state->frame_width_in_mbs;
> + unsigned int h_mb = generic_state->frame_height_in_mbs;
> +
> + obj_surface = encode_state->reconstructed_object;
> +
> + if (!obj_surface || !obj_surface->private_data)
> + return;
> + avc_priv_surface = obj_surface->private_data;
> +
> + BEGIN_BCS_BATCH(batch, 26);
> +
> + OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
> + /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> + OUT_BUFFER_2DW(batch, NULL, 0, 0);
> +
> + /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
> + size = w_mb * h_mb * 32 * 4;
> + OUT_BUFFER_3DW(batch,
> + avc_priv_surface->res_mv_data_surface.bo,
> + 1,
> + 0,
> + 0);
> + OUT_BUFFER_2DW(batch,
> + avc_priv_surface->res_mv_data_surface.bo,
> + 1,
> + ALIGN(size,0x1000));
> +
> + /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> + OUT_BUFFER_2DW(batch, NULL, 0, 0);
> +
> + /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> + OUT_BUFFER_2DW(batch, NULL, 0, 0);
> +
> + /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
> + * Note: an offset is specified in MFX_AVC_SLICE_STATE
> + */
> + OUT_BUFFER_3DW(batch,
> + avc_ctx->compressed_bitstream.res.bo,
> + 1,
> + 0,
> + 0);
> + OUT_BUFFER_2DW(batch,
> + avc_ctx->compressed_bitstream.res.bo,
> + 1,
> + avc_ctx->compressed_bitstream.end_offset);
> +
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_avc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
> +{
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> +
> + BEGIN_BCS_BATCH(batch, 10);
> +
> + OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
> +
> + /* The DW1-3 is for bsd/mpc row store scratch buffer */
> + OUT_BUFFER_3DW(batch, avc_ctx->res_bsd_mpc_row_store_scratch_buffer.bo, 1, 0, 0);
> +
> + /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> +
> + /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
> + OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
> +
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_avc_directmode_state(VADriverContextP ctx,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct gen9_avc_encoder_context * avc_ctx = (struct gen9_avc_encoder_context * )pak_context->private_enc_ctx;
> + struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
> +
> + int i;
> +
> + BEGIN_BCS_BATCH(batch, 71);
> +
> + OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
> +
> + /* Reference frames and Current frames */
> + /* the DW1-32 is for the direct MV for reference */
> + for(i = 0; i< NUM_MFC_AVC_DMV_BUFFERS - 2; i += 2) {
> + if ( avc_ctx->res_direct_mv_buffersr[i].bo != NULL) {
> + OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[i].bo,
> + I915_GEM_DOMAIN_INSTRUCTION, 0,
> + 0);
> + OUT_BCS_BATCH(batch, 0);
> + } else {
> + OUT_BCS_BATCH(batch, 0);
> + OUT_BCS_BATCH(batch, 0);
> + }
> + }
> +
> + OUT_BCS_BATCH(batch, 0);
> +
> + /* the DW34-36 is the MV for the current reference */
> + OUT_BCS_RELOC(batch, avc_ctx->res_direct_mv_buffersr[NUM_MFC_AVC_DMV_BUFFERS - 2].bo,
> + I915_GEM_DOMAIN_INSTRUCTION, 0,
> + 0);
> +
> + OUT_BCS_BATCH(batch, 0);
> + OUT_BCS_BATCH(batch, 0);
> +
> + /* POL list */
> + for(i = 0; i< 32; i++) {
> + OUT_BCS_BATCH(batch, avc_state->top_field_poc[i]);
> + }
> + OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 2]);
> + OUT_BCS_BATCH(batch, avc_state->top_field_poc[NUM_MFC_AVC_DMV_BUFFERS - 1]);
> +
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_qm_state(VADriverContextP ctx,
> + int qm_type,
> + const unsigned int *qm,
> + int qm_length,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + unsigned int qm_buffer[16];
> +
> + assert(qm_length<= 16);
> + assert(sizeof(*qm) == 4);
> + memset(qm_buffer,0,16*4);
> + memcpy(qm_buffer, qm, qm_length * 4);
> +
> + BEGIN_BCS_BATCH(batch, 18);
> + OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
> + OUT_BCS_BATCH(batch, qm_type<< 0);
> + intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_avc_qm_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
> + VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
> + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
> +
> + /* TODO: add support for non flat matrix */
> + const unsigned int *qm_4x4_intra;
> + const unsigned int *qm_4x4_inter;
> + const unsigned int *qm_8x8_intra;
> + const unsigned int *qm_8x8_inter;
> +
> + if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
> +&& !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
> + qm_4x4_intra = qm_4x4_inter = qm_8x8_intra = qm_8x8_inter = qm_flat;
> + } else {
> + VAIQMatrixBufferH264 *qm;
> + assert(encode_state->q_matrix&& encode_state->q_matrix->buffer);
> + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
> + qm_4x4_intra = (unsigned int *)qm->ScalingList4x4[0];
> + qm_4x4_inter = (unsigned int *)qm->ScalingList4x4[3];
> + qm_8x8_intra = (unsigned int *)qm->ScalingList8x8[0];
> + qm_8x8_inter = (unsigned int *)qm->ScalingList8x8[1];
> + }
> +
> + gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm_4x4_intra, 12, encoder_context);
> + gen9_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm_4x4_inter, 12, encoder_context);
> + gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm_8x8_intra, 16, encoder_context);
> + gen9_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm_8x8_inter, 16, encoder_context);
> +}
> +
> +static void
> +gen9_mfc_fqm_state(VADriverContextP ctx,
> + int fqm_type,
> + const unsigned int *fqm,
> + int fqm_length,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + unsigned int fqm_buffer[32];
> +
> + assert(fqm_length<= 32);
> + assert(sizeof(*fqm) == 4);
> + memset(fqm_buffer,0,32*4);
> + memcpy(fqm_buffer, fqm, fqm_length * 4);
> +
> + BEGIN_BCS_BATCH(batch, 34);
> + OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
> + OUT_BCS_BATCH(batch, fqm_type<< 0);
> + intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
> + ADVANCE_BCS_BATCH(batch);
> +}
> +
> +static void
> +gen9_mfc_fill_fqm(uint8_t *qm, uint16_t *fqm, int len)
> +{
> + int i, j;
> + for (i = 0; i< len; i++)
> + for (j = 0; j< len; j++)
> + fqm[i * len + j] = (1<< 16) / qm[j * len + i];
> +}
> +
> +static void
> +gen9_mfc_avc_fqm_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + /* TODO: add support for non flat matrix */
> + struct encoder_vme_mfc_context * pak_context = (struct encoder_vme_mfc_context *)encoder_context->vme_context;
> + struct avc_enc_state * avc_state = (struct avc_enc_state * )pak_context->private_enc_state;
> + VAEncSequenceParameterBufferH264 *seq_param = avc_state->seq_param;
> + VAEncPictureParameterBufferH264 *pic_param = avc_state->pic_param;
> +
> + if (!seq_param->seq_fields.bits.seq_scaling_matrix_present_flag
> +&& !pic_param->pic_fields.bits.pic_scaling_matrix_present_flag) {
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm_flat, 24, encoder_context);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm_flat, 24, encoder_context);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm_flat, 32, encoder_context);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm_flat, 32, encoder_context);
> + } else {
> + int i;
> + uint32_t fqm[32];
> + VAIQMatrixBufferH264 *qm;
> + assert(encode_state->q_matrix&& encode_state->q_matrix->buffer);
> + qm = (VAIQMatrixBufferH264 *)encode_state->q_matrix->buffer;
> +
> + for (i = 0; i< 3; i++)
> + gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * i, 4);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, fqm, 24, encoder_context);
> +
> + for (i = 3; i< 6; i++)
> + gen9_mfc_fill_fqm(qm->ScalingList4x4[i], (uint16_t *)fqm + 16 * (i - 3), 4);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, fqm, 24, encoder_context);
> +
> + gen9_mfc_fill_fqm(qm->ScalingList8x8[0], (uint16_t *)fqm, 8);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, fqm, 32, encoder_context);
> +
> + gen9_mfc_fill_fqm(qm->ScalingList8x8[1], (uint16_t *)fqm, 8);
> + gen9_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, fqm, 32, encoder_context);
> + }
> +}
More information about the Libva
mailing list