[Libva] [PATCH V2 5/6] HEVC ENC:Added VME pipeline
Qu, Pengfei
pengfei.qu at intel.com
Tue Jan 6 17:54:34 PST 2015
Sure, I will put src/i965_encoder.c into an independent patch.
-----Original Message-----
From: Zhao, Yakui
Sent: Wednesday, January 7, 2015 9:16 AM
To: Qu, Pengfei
Cc: libva at lists.freedesktop.org
Subject: Re: [Libva] [PATCH V2 5/6] HEVC ENC:Added VME pipeline
On Tue, 2015-01-06 at 01:57 -0700, Qu,Pengfei wrote:
It seems that this patch also mixes the VME pipeline setting up and initialization of intel_enc_hw_context_init together.
Can it be split into two patches?
> Signed-off-by: Qu,Pengfei <Pengfei.Qu at intel.com>
> ---
> src/gen6_mfc_common.c | 222 +++++++++++++++++++++++
> src/gen6_vme.h | 19 ++
> src/gen9_vme.c | 484 ++++++++++++++++++++++++++++++++++++++++++++++++++
> src/i965_encoder.c | 74 +++++++-
> 4 files changed, 798 insertions(+), 1 deletion(-)
>
> diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index
> fe41dac..6a5f720 100644
> --- a/src/gen6_mfc_common.c
> +++ b/src/gen6_mfc_common.c
> @@ -1652,3 +1652,225 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
> return;
> }
>
> +/* HEVC */
> +static int
> +hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
> + VAPictureHEVC *ref_list,
> + int num_pictures,
> + int dir)
> +{
> + int i, found = -1, min = 0x7FFFFFFF;
> +
> + for (i = 0; i < num_pictures; i++) {
> + int tmp;
> +
> + if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
> + (ref_list[i].picture_id == VA_INVALID_SURFACE))
> + break;
> +
> + tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
> +
> + if (dir)
> + tmp = -tmp;
> +
> + if (tmp > 0 && tmp < min) {
> + min = tmp;
> + found = i;
> + }
> + }
> +
> + return found;
> +}
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context,
> + int list_index,
> + int surface_index,
> + void (* vme_source_surface_state)(
> + VADriverContextP ctx,
> + int index,
> + struct object_surface *obj_surface,
> + struct intel_encoder_context
> +*encoder_context)) {
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + struct object_surface *obj_surface = NULL;
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + VASurfaceID ref_surface_id;
> + VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> + VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int max_num_references;
> + VAPictureHEVC *curr_pic;
> + VAPictureHEVC *ref_list;
> + int ref_idx;
> +
> + if (list_index == 0) {
> + max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
> + ref_list = slice_param->ref_pic_list0;
> + } else {
> + max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
> + ref_list = slice_param->ref_pic_list1;
> + }
> +
> + if (max_num_references == 1) {
> + if (list_index == 0) {
> + ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
> + vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
> + } else {
> + ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
> + vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
> + }
> +
> + if (ref_surface_id != VA_INVALID_SURFACE)
> + obj_surface = SURFACE(ref_surface_id);
> +
> + if (!obj_surface ||
> + !obj_surface->bo) {
> + obj_surface = encode_state->reference_objects[list_index];
> + vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
> + }
> +
> + ref_idx = 0;
> + } else {
> + curr_pic = &pic_param->decoded_curr_pic;
> +
> + /* select the reference frame in temporal space */
> + ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
> + ref_surface_id = ref_list[ref_idx].picture_id;
> +
> + if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
> + obj_surface = SURFACE(ref_surface_id);
> +
> + vme_context->used_reference_objects[list_index] = obj_surface;
> + vme_context->used_references[list_index] = &ref_list[ref_idx];
> + }
> +
> + if (obj_surface &&
> + obj_surface->bo) {
> + assert(ref_idx >= 0);
> + vme_context->used_reference_objects[list_index] = obj_surface;
> + vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
> + vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
> + ref_idx << 16 |
> + ref_idx << 8 |
> + ref_idx);
> + } else {
> + vme_context->used_reference_objects[list_index] = NULL;
> + vme_context->used_references[list_index] = NULL;
> + vme_context->ref_index_in_mb[list_index] = 0;
> + }
> +}
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context
> +*encoder_context) {
> + //struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> + VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int qp, m_cost, j, mv_count;
> + uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
> + float lambda, m_costf;
> +
> + /* here no SI SP slice for HEVC, do not need slice fixup */
> + int slice_type = slice_param->slice_type;
> +
> +
> + /* to do for CBR*/
> + //if (encoder_context->rate_control_mode == VA_RC_CQP)
> + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> + //else
> + //qp =
> + mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> +
> + if (vme_state_message == NULL)
> + return;
> +
> + assert(qp <= QP_MAX);
> + lambda = intel_lambda_qp(qp);
> + if (slice_type == SLICE_TYPE_I) {
> + vme_state_message[MODE_INTRA_16X16] = 0;
> + m_cost = lambda * 4;
> + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 16;
> + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 3;
> + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> + } else {
> + m_cost = 0;
> + vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> + for (j = 1; j < 3; j++) {
> + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> + m_cost = (int)m_costf;
> + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
> + }
> + mv_count = 3;
> + for (j = 4; j <= 64; j *= 2) {
> + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> + m_cost = (int)m_costf;
> + vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
> + mv_count++;
> + }
> +
> + if (qp <= 25) {
> + vme_state_message[MODE_INTRA_16X16] = 0x4a;
> + vme_state_message[MODE_INTRA_8X8] = 0x4a;
> + vme_state_message[MODE_INTRA_4X4] = 0x4a;
> + vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
> + vme_state_message[MODE_INTER_16X16] = 0x4a;
> + vme_state_message[MODE_INTER_16X8] = 0x4a;
> + vme_state_message[MODE_INTER_8X8] = 0x4a;
> + vme_state_message[MODE_INTER_8X4] = 0x4a;
> + vme_state_message[MODE_INTER_4X4] = 0x4a;
> + vme_state_message[MODE_INTER_BWD] = 0x2a;
> + return;
> + }
> + m_costf = lambda * 10;
> + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 14;
> + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 24;
> + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 3.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> + if (slice_type == SLICE_TYPE_P) {
> + m_costf = lambda * 2.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 4;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 1.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 3;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> + /* BWD is not used in P-frame */
> + vme_state_message[MODE_INTER_BWD] = 0;
> + } else {
> + m_costf = lambda * 2.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 5.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 3.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 5.0;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 6.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 1.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
> + }
> + }
> +}
> diff --git a/src/gen6_vme.h b/src/gen6_vme.h index bc62c14..c9d6b48
> 100644
> --- a/src/gen6_vme.h
> +++ b/src/gen6_vme.h
> @@ -83,6 +83,7 @@ struct gen6_vme_context
> unsigned long surface_state_offset);
> void *vme_state_message;
> unsigned int h264_level;
> + unsigned int hevc_level;
> unsigned int video_coding_type;
> unsigned int vme_kernel_sum;
> unsigned int mpeg2_level;
> @@ -174,6 +175,24 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
> struct object_surface *obj_surface,
> struct intel_encoder_context
> *encoder_context));
>
> +/* HEVC */
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context,
> + int list_index,
> + int surface_index,
> + void (* vme_source_surface_state)(
> + VADriverContextP ctx,
> + int index,
> + struct object_surface *obj_surface,
> + struct intel_encoder_context
> +*encoder_context));
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context
> +*encoder_context);
> +
> +
> extern Bool gen8_vme_context_init(VADriverContextP ctx, struct
> intel_encoder_context *encoder_context);
>
> extern Bool gen9_vme_context_init(VADriverContextP ctx, struct
> intel_encoder_context *encoder_context); diff --git a/src/gen9_vme.c
> b/src/gen9_vme.c index b4310f2..0e94581 100644
> --- a/src/gen9_vme.c
> +++ b/src/gen9_vme.c
> @@ -120,6 +120,43 @@ static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
> },
> };
>
> +/* HEVC */
> +
> +static const uint32_t gen9_vme_hevc_intra_frame[][4] = { #include
> +"shaders/vme/intra_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_frame[][4] = { #include
> +"shaders/vme/inter_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_bframe[][4] = { #include
> +"shaders/vme/inter_bframe_gen9.g9b"
> +};
> +
> +static struct i965_kernel gen9_vme_hevc_kernels[] = {
> + {
> + "VME Intra Frame",
> + VME_INTRA_SHADER, /*index*/
> + gen9_vme_hevc_intra_frame,
> + sizeof(gen9_vme_hevc_intra_frame),
> + NULL
> + },
> + {
> + "VME inter Frame",
> + VME_INTER_SHADER,
> + gen9_vme_hevc_inter_frame,
> + sizeof(gen9_vme_hevc_inter_frame),
> + NULL
> + },
> + {
> + "VME inter BFrame",
> + VME_BINTER_SHADER,
> + gen9_vme_hevc_inter_bframe,
> + sizeof(gen9_vme_hevc_inter_bframe),
> + NULL
> + }
> +};
> /* only used for VME source surface state */ static void
> gen9_vme_source_surface_state(VADriverContextP ctx, @@ -330,6 +367,13
> @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
> }
> } else if (encoder_context->codec == CODEC_MPEG2) {
> mv_num = 2;
> + }else if (encoder_context->codec == CODEC_HEVC) {
> + if (vme_context->hevc_level >= 30*3) {
> + mv_num = 16;
> +
> + if (vme_context->hevc_level >= 31*3)
> + mv_num = 8;
> + }/* use the avc level setting */
> }
>
> vme_state_message[31] = mv_num;
> @@ -1130,6 +1174,440 @@ gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
> return VA_STATUS_SUCCESS;
> }
>
> +/* HEVC */
> +
> +static void
> +gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int index,
> + struct intel_encoder_context
> +*encoder_context)
> +
> +{
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> + int height_in_mbs =
> +(pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> +
> + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
> + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
> +
> + if (is_intra)
> + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
> + else
> + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
> + /*
> + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
> + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
> + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
> + */
> +
> + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
> + "VME output buffer",
> + vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
> + 0x1000);
> + assert(vme_context->vme_output.bo);
> + vme_context->vme_buffer_suface_setup(ctx,
> + &vme_context->gpe_context,
> + &vme_context->vme_output,
> + BINDING_TABLE_OFFSET(index),
> +
> +SURFACE_STATE_OFFSET(index)); }
> +
> +static void
> +gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int index,
> + struct intel_encoder_context
> +*encoder_context)
> +
> +{
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> + int height_in_mbs =
> +(pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
> + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
> + vme_context->vme_batchbuffer.pitch = 16;
> + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
> + "VME batchbuffer",
> + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
> + 0x1000); } static
> +VAStatus gen9_vme_hevc_surface_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int is_intra,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct object_surface *obj_surface;
> +
> + /*Setup surfaces state*/
> + /* current picture for encoding */
> + obj_surface = encode_state->input_yuv_object;
> + gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
> + gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
> + gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface,
> + encoder_context);
> +
> + if (!is_intra) {
> + VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int slice_type;
> +
> + slice_type = slice_param->slice_type;
> + assert(slice_type != SLICE_TYPE_I && slice_type !=
> + SLICE_TYPE_SI);
> +
> + /* to do HEVC */
> + intel_hevc_vme_reference_state(ctx, encode_state,
> + encoder_context, 0, 1, gen9_vme_source_surface_state);
> +
> + if (slice_type == SLICE_TYPE_B)
> + intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
> + }
> +
> + /* VME output */
> + gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
> + gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5,
> + encoder_context);
> +
> + return VA_STATUS_SUCCESS;
> +}
> +static void
> +gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int mb_width, int mb_height,
> + int kernel,
> + int transform_8x8_mode_flag,
> + struct intel_encoder_context
> +*encoder_context) {
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + int mb_row;
> + int s;
> + unsigned int *command_ptr;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> + int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> + int ctb_size = 1 << log2_ctb_size;
> + int num_mb_in_ctb = (ctb_size + 15)/16;
> + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +#define USE_SCOREBOARD (1 << 21)
> +
> + dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> + command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> + /*slice_segment_address must picture_width_in_ctb alainment */
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> + int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> + int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
> + unsigned int mb_intra_ub, score_dep;
> + int x_outer, y_outer, x_inner, y_inner;
> + int xtemp_outer = 0;
> +
> + x_outer = first_mb % mb_width;
> + y_outer = first_mb / mb_width;
> + mb_row = y_outer;
> +
> + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_inner = x_outer;
> + y_inner = y_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> + *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> + *command_ptr++ = 0;
> +
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer += 1;
> + }
> +
> + xtemp_outer = mb_width - 2;
> + if (xtemp_outer < 0)
> + xtemp_outer = 0;
> + x_outer = xtemp_outer;
> + y_outer = first_mb / mb_width;
> + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + y_inner = y_outer;
> + x_inner = x_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) |
> + transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> + *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> + *command_ptr++ = 0;
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer++;
> + if (x_outer >= mb_width) {
> + y_outer += 1;
> + x_outer = xtemp_outer;
> + }
> + }
> + }
> +
> + *command_ptr++ = MI_BATCH_BUFFER_END;
> + *command_ptr++ = 0;
> +
> + dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void
> +gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int mb_width, int mb_height,
> + int kernel,
> + int transform_8x8_mode_flag,
> + struct intel_encoder_context
> +*encoder_context) {
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + int mb_x = 0, mb_y = 0;
> + int i, s;
> + unsigned int *command_ptr;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> + int log2_ctb_size =
> +pSequenceParameter->log2_diff_max_min_luma_coding_block_size +
> +log2_cu_size;
> +
> + int ctb_size = 1 << log2_ctb_size;
> + int num_mb_in_ctb = (ctb_size + 15)/16;
> + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> + dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> + command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> + int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> + int slice_mb_number = pSliceParameter->num_ctu_in_slice *
> + num_mb_in_ctb;
> +
> + unsigned int mb_intra_ub;
> + int slice_mb_x = slice_mb_begin % mb_width;
> + for (i = 0; i < slice_mb_number; ) {
> + int mb_count = i + slice_mb_begin;
> + mb_x = mb_count % mb_width;
> + mb_y = mb_count / mb_width;
> + mb_intra_ub = 0;
> +
> + if (mb_x != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + }
> + if (mb_y != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + if (mb_x != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> + if (mb_x != (mb_width -1))
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + }
> + if (i < mb_width) {
> + if (i == 0)
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> + if ((i == (mb_width - 1)) && slice_mb_x) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + }
> + }
> +
> + if ((i == mb_width) && slice_mb_x) {
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = 0;
> + *command_ptr++ = 0;
> + *command_ptr++ = 0;
> + *command_ptr++ = 0;
> +
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
> + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag |
> + (mb_intra_ub << 8));
> +
> + *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> + *command_ptr++ = 0;
> + i += 1;
> + }
> + }
> +
> + *command_ptr++ = MI_BATCH_BUFFER_END;
> + *command_ptr++ = 0;
> +
> + dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context
> +*encoder_context) {
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> + int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> + int kernel_shader;
> + bool allow_hwscore = true;
> + int s;
> +
> + int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> + int log2_ctb_size =
> + pSequenceParameter->log2_diff_max_min_luma_coding_block_size +
> + log2_cu_size;
> +
> + int ctb_size = 1 << log2_ctb_size;
> + int num_mb_in_ctb = (ctb_size + 15)/16;
> + int transform_8x8_mode_flag = 1;
> + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> + int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> + if ((slice_mb_begin % width_in_mbs)) {
> + allow_hwscore = false;
> + break;
> + }
> + }
> +
> + if (pSliceParameter->slice_type == SLICE_TYPE_I) {
> + kernel_shader = VME_INTRA_SHADER;
> + } else if (pSliceParameter->slice_type == SLICE_TYPE_P) {
> + kernel_shader = VME_INTER_SHADER;
> + } else {
> + kernel_shader = VME_BINTER_SHADER;
> + if (!allow_hwscore)
> + kernel_shader = VME_INTER_SHADER;
> + }
> + if (allow_hwscore)
> + gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
> + encode_state,
> + width_in_mbs, height_in_mbs,
> + kernel_shader,
> + transform_8x8_mode_flag,
> + encoder_context);
> + else
> + gen9_vme_hevc_fill_vme_batchbuffer(ctx,
> + encode_state,
> + width_in_mbs, height_in_mbs,
> + kernel_shader,
> + transform_8x8_mode_flag,
> + encoder_context);
> +
> + intel_batchbuffer_start_atomic(batch, 0x1000);
> + gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
> + BEGIN_BATCH(batch, 3);
> + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
> + OUT_RELOC(batch,
> + vme_context->vme_batchbuffer.bo,
> + I915_GEM_DOMAIN_COMMAND, 0,
> + 0);
> + OUT_BATCH(batch, 0);
> + ADVANCE_BATCH(batch);
> +
> + gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
> +
> + intel_batchbuffer_end_atomic(batch);
> +}
> +
> +static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context
> +*encoder_context) {
> + VAStatus vaStatus = VA_STATUS_SUCCESS;
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + struct gen6_vme_context *vme_context =
> +encoder_context->vme_context;
> +
> + /* here use the avc level for hevc vme */
> + if (!vme_context->hevc_level ||
> + (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
> + vme_context->hevc_level = pSequenceParameter->general_level_idc;
> + }
> +
> + intel_vme_hevc_update_mbmv_cost(ctx, encode_state,
> + encoder_context);
> +
> + /*Setup all the memory object*/
> + gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
> + gen9_vme_interface_setup(ctx, encode_state, encoder_context);
> + //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
> + gen9_vme_constant_setup(ctx, encode_state, encoder_context);
> +
> + /*Programing media pipeline*/
> + gen9_vme_hevc_pipeline_programing(ctx, encode_state,
> + encoder_context);
> +
> + return vaStatus;
> +}
> +
> +
> +static VAStatus
> +gen9_vme_hevc_pipeline(VADriverContextP ctx,
> + VAProfile profile,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context) {
> + gen9_vme_media_init(ctx, encoder_context);
> + gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
> + gen9_vme_run(ctx, encode_state, encoder_context);
> + gen9_vme_stop(ctx, encode_state, encoder_context);
> +
> + return VA_STATUS_SUCCESS;
> +}
> +
> +
> static void
> gen9_vme_context_destroy(void *context) { @@ -1172,6 +1650,12 @@
> Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
> vme_kernel_list = gen9_vme_mpeg2_kernels;
> encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
> i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) /
> sizeof(struct i965_kernel);
> + break;
> +
> + case CODEC_HEVC:
> + vme_kernel_list = gen9_vme_hevc_kernels;
> + encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
> + i965_kernel_num = sizeof(gen9_vme_hevc_kernels) /
> + sizeof(struct i965_kernel);
>
> break;
>
> diff --git a/src/i965_encoder.c b/src/i965_encoder.c index
> d924f5a..c9ff2ec 100644
> --- a/src/i965_encoder.c
> +++ b/src/i965_encoder.c
> @@ -39,10 +39,12 @@
> #include "i965_encoder.h"
> #include "gen6_vme.h"
> #include "gen6_mfc.h"
> +#include "gen9_mfc.h"
>
> extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct
> intel_encoder_context *encoder_context); extern Bool
> gen6_vme_context_init(VADriverContextP ctx, struct
> intel_encoder_context *encoder_context); extern Bool
> gen7_mfc_context_init(VADriverContextP ctx, struct
> intel_encoder_context *encoder_context);
> +extern Bool gen9_hcpe_context_init(VADriverContextP ctx, struct
> +intel_encoder_context *encoder_context);
>
> static VAStatus
> intel_encoder_check_yuv_surface(VADriverContextP ctx, @@ -422,6
> +424,63 @@ error:
> }
>
> static VAStatus
> +intel_encoder_check_hevc_parameter(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context
> +*encoder_context) {
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + struct object_surface *obj_surface;
> + struct object_buffer *obj_buffer;
> + VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> + int i;
> +
> + assert(!(pic_param->decoded_curr_pic.flags &
> + VA_PICTURE_HEVC_INVALID));
> +
> + if (pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID)
> + goto error;
> +
> + obj_surface = SURFACE(pic_param->decoded_curr_pic.picture_id);
> + assert(obj_surface); /* It is possible the store buffer isn't
> + allocated yet */
> +
> + if (!obj_surface)
> + goto error;
> +
> + encode_state->reconstructed_object = obj_surface;
> + obj_buffer = BUFFER(pic_param->coded_buf);
> + assert(obj_buffer && obj_buffer->buffer_store &&
> + obj_buffer->buffer_store->bo);
> +
> + if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo)
> + goto error;
> +
> + encode_state->coded_buf_object = obj_buffer;
> +
> + for (i = 0; i < 15; i++) {
> + if (pic_param->reference_frames[i].flags & VA_PICTURE_HEVC_INVALID ||
> + pic_param->reference_frames[i].picture_id == VA_INVALID_SURFACE)
> + break;
> + else {
> + obj_surface = SURFACE(pic_param->reference_frames[i].picture_id);
> + assert(obj_surface);
> +
> + if (!obj_surface)
> + goto error;
> +
> + if (obj_surface->bo)
> + encode_state->reference_objects[i] = obj_surface;
> + else
> + encode_state->reference_objects[i] = NULL; /* FIXME: Warning or Error ??? */
> + }
> + }
> +
> + for ( ; i < 15; i++)
> + encode_state->reference_objects[i] = NULL;
> +
> + return VA_STATUS_SUCCESS;
> +
> +error:
> + return VA_STATUS_ERROR_INVALID_PARAMETER;
> +}
> +static VAStatus
> intel_encoder_sanity_check_input(VADriverContextP ctx,
> VAProfile profile,
> struct encode_state *encode_state,
> @@ -459,6 +518,13 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
> break;
> }
>
> + case VAProfileHEVCMain: {
> + vaStatus = intel_encoder_check_hevc_parameter(ctx, encode_state, encoder_context);
> + if (vaStatus != VA_STATUS_SUCCESS)
> + goto out;
> + vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
> + break;
> + }
> default:
> vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
> break;
> @@ -554,6 +620,10 @@ intel_enc_hw_context_init(VADriverContextP ctx,
> encoder_context->codec = CODEC_JPEG;
> break;
>
> + case VAProfileHEVCMain:
> + encoder_context->codec = CODEC_HEVC;
> + break;
> +
> default:
> /* Never get here */
> assert(0);
> @@ -617,7 +687,9 @@ gen8_enc_hw_context_init(VADriverContextP ctx,
> struct object_config *obj_config) struct hw_context *
> gen9_enc_hw_context_init(VADriverContextP ctx, struct object_config
> *obj_config) {
> - if (obj_config->profile == VAProfileJPEGBaseline)
> + if (obj_config->profile == VAProfileHEVCMain) {
> + return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_hcpe_context_init);
> + } else if (obj_config->profile == VAProfileJPEGBaseline)
> return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
> else
> return intel_enc_hw_context_init(ctx, obj_config,
> gen9_vme_context_init, gen9_mfc_context_init);
> --
> 1.9.1
>
> _______________________________________________
> Libva mailing list
> Libva at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/libva
More information about the Libva
mailing list