[Libva] [PATCH 1/5] VME pipeline for HEVC
Zhao, Yakui
yakui.zhao at intel.com
Mon Jan 5 21:30:22 PST 2015
On Mon, 2015-01-05 at 22:20 -0700, Qu,Pengfei wrote:
> Signed-off-by: Qu,Pengfei <Pengfei.Qu at intel.com>
The patch includes too much changes of coding style besides setting up
VME pipeline for HEVC.
(For example: the code change in intel_mfc_avc_prepare,
intel_vme_update_mbmv_cost, gen7_vme_walker_fill_vme_batchbuffer).
In fact these are mainly the update of coding style. It is not related
with setting VME pipeline for HEVC.
It will be better that it can be split into two patches instead of
combining them together.
Thanks
Yakui
> ---
> src/gen6_mfc_common.c | 688 ++++++++++++++++++++++++-------------
> src/gen6_vme.h | 19 ++
> src/gen9_vme.c | 928 ++++++++++++++++++++++++++++++++++++++------------
> 3 files changed, 1180 insertions(+), 455 deletions(-)
>
> diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
> index fe41dac..a69f00a 100644
> --- a/src/gen6_mfc_common.c
> +++ b/src/gen6_mfc_common.c
> @@ -631,8 +631,8 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
> int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
>
> if (IS_GEN6(i965->intel.device_info)) {
> - /* On the SNB it should be fixed to 128 for the DMV buffer */
> - width_in_mbs = 128;
> + /* On the SNB it should be fixed to 128 for the DMV buffer */
> + width_in_mbs = 128;
> }
>
> for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
> @@ -735,7 +735,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
> break;
> }
> }
> -
> +
> mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
> dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
>
> @@ -807,7 +807,7 @@ int intel_format_lutvalue(int value, int max)
> if (temp1 > temp2)
> ret = max;
> return ret;
> -
> +
> }
>
>
> @@ -842,40 +842,40 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
>
>
> if (encoder_context->rate_control_mode == VA_RC_CQP)
> - qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> else
> - qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> -
> + qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> +
> if (vme_state_message == NULL)
> - return;
> -
> + return;
> +
> assert(qp <= QP_MAX);
> lambda = intel_lambda_qp(qp);
> if (slice_type == SLICE_TYPE_I) {
> - vme_state_message[MODE_INTRA_16X16] = 0;
> - m_cost = lambda * 4;
> - vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> - m_cost = lambda * 16;
> - vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> - m_cost = lambda * 3;
> - vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> + vme_state_message[MODE_INTRA_16X16] = 0;
> + m_cost = lambda * 4;
> + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 16;
> + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 3;
> + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> } else {
> - m_cost = 0;
> - vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> - for (j = 1; j < 3; j++) {
> + m_cost = 0;
> + vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> + for (j = 1; j < 3; j++) {
> m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> m_cost = (int)m_costf;
> vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
> - }
> - mv_count = 3;
> - for (j = 4; j <= 64; j *= 2) {
> + }
> + mv_count = 3;
> + for (j = 4; j <= 64; j *= 2) {
> m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> m_cost = (int)m_costf;
> vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
> mv_count++;
> - }
> + }
>
> - if (qp <= 25) {
> + if (qp <= 25) {
> vme_state_message[MODE_INTRA_16X16] = 0x4a;
> vme_state_message[MODE_INTRA_8X8] = 0x4a;
> vme_state_message[MODE_INTRA_4X4] = 0x4a;
> @@ -887,17 +887,17 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
> vme_state_message[MODE_INTER_4X4] = 0x4a;
> vme_state_message[MODE_INTER_BWD] = 0x2a;
> return;
> - }
> - m_costf = lambda * 10;
> - vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> - m_cost = lambda * 14;
> - vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> - m_cost = lambda * 24;
> - vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> - m_costf = lambda * 3.5;
> - m_cost = m_costf;
> - vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> - if (slice_type == SLICE_TYPE_P) {
> + }
> + m_costf = lambda * 10;
> + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 14;
> + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 24;
> + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 3.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> + if (slice_type == SLICE_TYPE_P) {
> m_costf = lambda * 2.5;
> m_cost = m_costf;
> vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> @@ -915,7 +915,7 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
> vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> /* BWD is not used in P-frame */
> vme_state_message[MODE_INTER_BWD] = 0;
> - } else {
> + } else {
> m_costf = lambda * 2.5;
> m_cost = m_costf;
> vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> @@ -934,7 +934,7 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
> m_costf = lambda * 1.5;
> m_cost = m_costf;
> vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
> - }
> + }
> }
> }
>
> @@ -961,7 +961,7 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont
> vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
> vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
> vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
> -
> +
> vme_context->gpe_context.vfe_desc7.dword = 0;
> return;
> }
> @@ -974,7 +974,7 @@ static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num
> return -1;
> if (y_index < 0 || y_index >= mb_height)
> return -1;
> -
> +
> mb_index = y_index * mb_width + x_index;
> if (mb_index < first_mb || mb_index > (first_mb + num_mb))
> return -1;
> @@ -1000,103 +1000,103 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>
> for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> - VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> - int first_mb = pSliceParameter->macroblock_address;
> - int num_mb = pSliceParameter->num_macroblocks;
> - unsigned int mb_intra_ub, score_dep;
> - int x_outer, y_outer, x_inner, y_inner;
> - int xtemp_outer = 0;
> -
> - x_outer = first_mb % mb_width;
> - y_outer = first_mb / mb_width;
> - mb_row = y_outer;
> -
> - for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> - x_inner = x_outer;
> - y_inner = y_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> - }
> - if (y_inner != mb_row) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> + int first_mb = pSliceParameter->macroblock_address;
> + int num_mb = pSliceParameter->num_macroblocks;
> + unsigned int mb_intra_ub, score_dep;
> + int x_outer, y_outer, x_inner, y_inner;
> + int xtemp_outer = 0;
> +
> + x_outer = first_mb % mb_width;
> + y_outer = first_mb / mb_width;
> + mb_row = y_outer;
> +
> + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_inner = x_outer;
> + y_inner = y_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = USE_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer += 1;
> - }
> -
> - xtemp_outer = mb_width - 2;
> - if (xtemp_outer < 0)
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer += 1;
> + }
> +
> + xtemp_outer = mb_width - 2;
> + if (xtemp_outer < 0)
> xtemp_outer = 0;
> - x_outer = xtemp_outer;
> - y_outer = first_mb / mb_width;
> - for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> - y_inner = y_outer;
> - x_inner = x_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> - }
> - if (y_inner != mb_row) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> + x_outer = xtemp_outer;
> + y_outer = first_mb / mb_width;
> + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + y_inner = y_outer;
> + x_inner = x_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = USE_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> -
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer++;
> - if (x_outer >= mb_width) {
> - y_outer += 1;
> - x_outer = xtemp_outer;
> - }
> - }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer++;
> + if (x_outer >= mb_width) {
> + y_outer += 1;
> + x_outer = xtemp_outer;
> + }
> + }
> }
>
> *command_ptr++ = 0;
> @@ -1270,7 +1270,7 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
> */
> vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> -
> +
> vme_state_message[MODE_INTER_16X8] = 0;
> vme_state_message[MODE_INTER_8X8] = 0;
> vme_state_message[MODE_INTER_8X4] = 0;
> @@ -1300,105 +1300,105 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>
> {
> - unsigned int mb_intra_ub, score_dep;
> - int x_outer, y_outer, x_inner, y_inner;
> - int xtemp_outer = 0;
> - int first_mb = 0;
> - int num_mb = mb_width * mb_height;
> -
> - x_outer = 0;
> - y_outer = 0;
> -
> -
> - for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> - x_inner = x_outer;
> - y_inner = y_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> - }
> - if (y_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> -
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> - }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = MPEG2_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer += 1;
> - }
> -
> - xtemp_outer = mb_width - 2;
> - if (xtemp_outer < 0)
> + unsigned int mb_intra_ub, score_dep;
> + int x_outer, y_outer, x_inner, y_inner;
> + int xtemp_outer = 0;
> + int first_mb = 0;
> + int num_mb = mb_width * mb_height;
> +
> + x_outer = 0;
> + y_outer = 0;
> +
> +
> + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_inner = x_outer;
> + y_inner = y_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> +
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = MPEG2_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer += 1;
> + }
> +
> + xtemp_outer = mb_width - 2;
> + if (xtemp_outer < 0)
> xtemp_outer = 0;
> - x_outer = xtemp_outer;
> - y_outer = 0;
> - for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> - y_inner = y_outer;
> - x_inner = x_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> - }
> - if (y_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> -
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> - }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = MPEG2_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> -
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer++;
> - if (x_outer >= mb_width) {
> - y_outer += 1;
> - x_outer = xtemp_outer;
> - }
> - }
> + x_outer = xtemp_outer;
> + y_outer = 0;
> + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + y_inner = y_outer;
> + x_inner = x_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> +
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = MPEG2_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> +
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer++;
> + if (x_outer >= mb_width) {
> + y_outer += 1;
> + x_outer = xtemp_outer;
> + }
> + }
> }
>
> *command_ptr++ = 0;
> @@ -1406,7 +1406,7 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
>
> dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> return;
> -}
> + }
>
> static int
> avc_temporal_find_surface(VAPictureH264 *curr_pic,
> @@ -1652,3 +1652,225 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
> return;
> }
>
> +/* HEVC */
> +static int
> +hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
> + VAPictureHEVC *ref_list,
> + int num_pictures,
> + int dir)
> +{
> + int i, found = -1, min = 0x7FFFFFFF;
> +
> + for (i = 0; i < num_pictures; i++) {
> + int tmp;
> +
> + if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
> + (ref_list[i].picture_id == VA_INVALID_SURFACE))
> + break;
> +
> + tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
> +
> + if (dir)
> + tmp = -tmp;
> +
> + if (tmp > 0 && tmp < min) {
> + min = tmp;
> + found = i;
> + }
> + }
> +
> + return found;
> +}
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context,
> + int list_index,
> + int surface_index,
> + void (* vme_source_surface_state)(
> + VADriverContextP ctx,
> + int index,
> + struct object_surface *obj_surface,
> + struct intel_encoder_context *encoder_context))
> +{
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + struct object_surface *obj_surface = NULL;
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + VASurfaceID ref_surface_id;
> + VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> + VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int max_num_references;
> + VAPictureHEVC *curr_pic;
> + VAPictureHEVC *ref_list;
> + int ref_idx;
> +
> + if (list_index == 0) {
> + max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
> + ref_list = slice_param->ref_pic_list0;
> + } else {
> + max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
> + ref_list = slice_param->ref_pic_list1;
> + }
> +
> + if (max_num_references == 1) {
> + if (list_index == 0) {
> + ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
> + vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
> + } else {
> + ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
> + vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
> + }
> +
> + if (ref_surface_id != VA_INVALID_SURFACE)
> + obj_surface = SURFACE(ref_surface_id);
> +
> + if (!obj_surface ||
> + !obj_surface->bo) {
> + obj_surface = encode_state->reference_objects[list_index];
> + vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
> + }
> +
> + ref_idx = 0;
> + } else {
> + curr_pic = &pic_param->decoded_curr_pic;
> +
> + /* select the reference frame in temporal space */
> + ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
> + ref_surface_id = ref_list[ref_idx].picture_id;
> +
> + if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
> + obj_surface = SURFACE(ref_surface_id);
> +
> + vme_context->used_reference_objects[list_index] = obj_surface;
> + vme_context->used_references[list_index] = &ref_list[ref_idx];
> + }
> +
> + if (obj_surface &&
> + obj_surface->bo) {
> + assert(ref_idx >= 0);
> + vme_context->used_reference_objects[list_index] = obj_surface;
> + vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
> + vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
> + ref_idx << 16 |
> + ref_idx << 8 |
> + ref_idx);
> + } else {
> + vme_context->used_reference_objects[list_index] = NULL;
> + vme_context->used_references[list_index] = NULL;
> + vme_context->ref_index_in_mb[list_index] = 0;
> + }
> +}
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + //struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> + VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int qp, m_cost, j, mv_count;
> + uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
> + float lambda, m_costf;
> +
> + /* here no SI SP slice for HEVC, do not need slice fixup */
> + int slice_type = slice_param->slice_type;
> +
> +
> + /* to do for CBR*/
> + //if (encoder_context->rate_control_mode == VA_RC_CQP)
> + qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> + //else
> + //qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> +
> + if (vme_state_message == NULL)
> + return;
> +
> + assert(qp <= QP_MAX);
> + lambda = intel_lambda_qp(qp);
> + if (slice_type == SLICE_TYPE_I) {
> + vme_state_message[MODE_INTRA_16X16] = 0;
> + m_cost = lambda * 4;
> + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 16;
> + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 3;
> + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> + } else {
> + m_cost = 0;
> + vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> + for (j = 1; j < 3; j++) {
> + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> + m_cost = (int)m_costf;
> + vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
> + }
> + mv_count = 3;
> + for (j = 4; j <= 64; j *= 2) {
> + m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> + m_cost = (int)m_costf;
> + vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
> + mv_count++;
> + }
> +
> + if (qp <= 25) {
> + vme_state_message[MODE_INTRA_16X16] = 0x4a;
> + vme_state_message[MODE_INTRA_8X8] = 0x4a;
> + vme_state_message[MODE_INTRA_4X4] = 0x4a;
> + vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
> + vme_state_message[MODE_INTER_16X16] = 0x4a;
> + vme_state_message[MODE_INTER_16X8] = 0x4a;
> + vme_state_message[MODE_INTER_8X8] = 0x4a;
> + vme_state_message[MODE_INTER_8X4] = 0x4a;
> + vme_state_message[MODE_INTER_4X4] = 0x4a;
> + vme_state_message[MODE_INTER_BWD] = 0x2a;
> + return;
> + }
> + m_costf = lambda * 10;
> + vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 14;
> + vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_cost = lambda * 24;
> + vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 3.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> + if (slice_type == SLICE_TYPE_P) {
> + m_costf = lambda * 2.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 4;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 1.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 3;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> + /* BWD is not used in P-frame */
> + vme_state_message[MODE_INTER_BWD] = 0;
> + } else {
> + m_costf = lambda * 2.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 5.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> + m_costf = lambda * 3.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 5.0;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 6.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> + m_costf = lambda * 1.5;
> + m_cost = m_costf;
> + vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
> + }
> + }
> +}
> diff --git a/src/gen6_vme.h b/src/gen6_vme.h
> index bc62c14..c9d6b48 100644
> --- a/src/gen6_vme.h
> +++ b/src/gen6_vme.h
> @@ -83,6 +83,7 @@ struct gen6_vme_context
> unsigned long surface_state_offset);
> void *vme_state_message;
> unsigned int h264_level;
> + unsigned int hevc_level;
> unsigned int video_coding_type;
> unsigned int vme_kernel_sum;
> unsigned int mpeg2_level;
> @@ -174,6 +175,24 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
> struct object_surface *obj_surface,
> struct intel_encoder_context *encoder_context));
>
> +/* HEVC */
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context,
> + int list_index,
> + int surface_index,
> + void (* vme_source_surface_state)(
> + VADriverContextP ctx,
> + int index,
> + struct object_surface *obj_surface,
> + struct intel_encoder_context *encoder_context));
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context);
> +
> +
> extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
>
> extern Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
> diff --git a/src/gen9_vme.c b/src/gen9_vme.c
> index b4310f2..9239645 100644
> --- a/src/gen9_vme.c
> +++ b/src/gen9_vme.c
> @@ -51,13 +51,13 @@
>
> #define VME_INTRA_SHADER 0
> #define VME_INTER_SHADER 1
> -#define VME_BINTER_SHADER 2
> +#define VME_BINTER_SHADER 2
>
> #define CURBE_ALLOCATION_SIZE 37 /* in 256-bit */
> #define CURBE_TOTAL_DATA_LENGTH (4 * 32) /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
> #define CURBE_URB_ENTRY_LENGTH 4 /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
>
> -#define VME_MSG_LENGTH 32
> +#define VME_MSG_LENGTH 32
>
> static const uint32_t gen9_vme_intra_frame[][4] = {
> #include "shaders/vme/intra_frame_gen9.g9b"
> @@ -120,6 +120,43 @@ static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
> },
> };
>
> +/* HEVC */
> +
> +static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
> +#include "shaders/vme/intra_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
> +#include "shaders/vme/inter_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
> +#include "shaders/vme/inter_bframe_gen9.g9b"
> +};
> +
> +static struct i965_kernel gen9_vme_hevc_kernels[] = {
> + {
> + "VME Intra Frame",
> + VME_INTRA_SHADER, /*index*/
> + gen9_vme_hevc_intra_frame,
> + sizeof(gen9_vme_hevc_intra_frame),
> + NULL
> + },
> + {
> + "VME inter Frame",
> + VME_INTER_SHADER,
> + gen9_vme_hevc_inter_frame,
> + sizeof(gen9_vme_hevc_inter_frame),
> + NULL
> + },
> + {
> + "VME inter BFrame",
> + VME_BINTER_SHADER,
> + gen9_vme_hevc_inter_bframe,
> + sizeof(gen9_vme_hevc_inter_bframe),
> + NULL
> + }
> +};
> /* only used for VME source surface state */
> static void
> gen9_vme_source_surface_state(VADriverContextP ctx,
> @@ -226,13 +263,13 @@ gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
> "VME batchbuffer",
> vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
> 0x1000);
> - /*
> + /*
> vme_context->vme_buffer_suface_setup(ctx,
> &vme_context->gpe_context,
> &vme_context->vme_batchbuffer,
> BINDING_TABLE_OFFSET(index),
> SURFACE_STATE_OFFSET(index));
> - */
> + */
> }
>
> static VAStatus
> @@ -251,15 +288,15 @@ gen9_vme_surface_setup(VADriverContextP ctx,
> gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
>
> if (!is_intra) {
> - VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
> - int slice_type;
> + VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
> + int slice_type;
>
> - slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
> - assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
> + slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
> + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
>
> - intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
> + intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
>
> - if (slice_type == SLICE_TYPE_B)
> + if (slice_type == SLICE_TYPE_B)
> intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
> }
>
> @@ -330,6 +367,13 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
> }
> } else if (encoder_context->codec == CODEC_MPEG2) {
> mv_num = 2;
> + }else if (encoder_context->codec == CODEC_HEVC) {
> + if (vme_context->hevc_level >= 30*3) {
> + mv_num = 16;
> +
> + if (vme_context->hevc_level >= 31*3)
> + mv_num = 8;
> + }/* use the avc level setting */
> }
>
> vme_state_message[31] = mv_num;
> @@ -388,107 +432,107 @@ gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>
> for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> - VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> - int first_mb = pSliceParameter->macroblock_address;
> - int num_mb = pSliceParameter->num_macroblocks;
> - unsigned int mb_intra_ub, score_dep;
> - int x_outer, y_outer, x_inner, y_inner;
> - int xtemp_outer = 0;
> -
> - x_outer = first_mb % mb_width;
> - y_outer = first_mb / mb_width;
> - mb_row = y_outer;
> -
> - for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> - x_inner = x_outer;
> - y_inner = y_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> + VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> + int first_mb = pSliceParameter->macroblock_address;
> + int num_mb = pSliceParameter->num_macroblocks;
> + unsigned int mb_intra_ub, score_dep;
> + int x_outer, y_outer, x_inner, y_inner;
> + int xtemp_outer = 0;
> +
> + x_outer = first_mb % mb_width;
> + y_outer = first_mb / mb_width;
> + mb_row = y_outer;
> +
> + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_inner = x_outer;
> + y_inner = y_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> }
> - if (y_inner != mb_row) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = USE_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> *command_ptr++ = 0;
>
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer += 1;
> - }
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer += 1;
> + }
>
> - xtemp_outer = mb_width - 2;
> - if (xtemp_outer < 0)
> + xtemp_outer = mb_width - 2;
> + if (xtemp_outer < 0)
> xtemp_outer = 0;
> - x_outer = xtemp_outer;
> - y_outer = first_mb / mb_width;
> - for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_outer = xtemp_outer;
> + y_outer = first_mb / mb_width;
> + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> y_inner = y_outer;
> - x_inner = x_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> + x_inner = x_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> }
> - if (y_inner != mb_row) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = USE_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
>
> *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> *command_ptr++ = 0;
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer++;
> - if (x_outer >= mb_width) {
> - y_outer += 1;
> - x_outer = xtemp_outer;
> - }
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer++;
> + if (x_outer >= mb_width) {
> + y_outer += 1;
> + x_outer = xtemp_outer;
> + }
> }
> }
>
> @@ -519,34 +563,34 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
> int slice_mb_begin = pSliceParameter->macroblock_address;
> int slice_mb_number = pSliceParameter->num_macroblocks;
> unsigned int mb_intra_ub;
> - int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
> + int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
> for (i = 0; i < slice_mb_number; ) {
> int mb_count = i + slice_mb_begin;
> mb_x = mb_count % mb_width;
> mb_y = mb_count / mb_width;
> - mb_intra_ub = 0;
> - if (mb_x != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - }
> - if (mb_y != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - if (mb_x != 0)
> + mb_intra_ub = 0;
> + if (mb_x != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + }
> + if (mb_y != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + if (mb_x != 0)
> mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> - if (mb_x != (mb_width -1))
> + if (mb_x != (mb_width -1))
> mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - }
> - if (i < mb_width) {
> - if (i == 0)
> + }
> + if (i < mb_width) {
> + if (i == 0)
> mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
> - mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> - if ((i == (mb_width - 1)) && slice_mb_x) {
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> + if ((i == (mb_width - 1)) && slice_mb_x) {
> mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - }
> - }
> + }
> + }
>
> - if ((i == mb_width) && slice_mb_x) {
> - mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> - }
> + if ((i == mb_width) && slice_mb_x) {
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> + }
> *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> *command_ptr++ = kernel;
> *command_ptr++ = 0;
> @@ -670,7 +714,7 @@ static VAStatus gen9_vme_prepare(VADriverContextP ctx,
>
> if (!vme_context->h264_level ||
> (vme_context->h264_level != pSequenceParameter->level_idc)) {
> - vme_context->h264_level = pSequenceParameter->level_idc;
> + vme_context->h264_level = pSequenceParameter->level_idc;
> }
>
> intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
> @@ -837,109 +881,109 @@ gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>
> {
> - unsigned int mb_intra_ub, score_dep;
> - int x_outer, y_outer, x_inner, y_inner;
> - int xtemp_outer = 0;
> - int first_mb = 0;
> - int num_mb = mb_width * mb_height;
> -
> - x_outer = 0;
> - y_outer = 0;
> -
> - for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> - x_inner = x_outer;
> - y_inner = y_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> + unsigned int mb_intra_ub, score_dep;
> + int x_outer, y_outer, x_inner, y_inner;
> + int xtemp_outer = 0;
> + int first_mb = 0;
> + int num_mb = mb_width * mb_height;
> +
> + x_outer = 0;
> + y_outer = 0;
> +
> + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_inner = x_outer;
> + y_inner = y_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> }
> - if (y_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> -
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> - }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = MPEG2_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> + if (y_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> +
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = MPEG2_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> *command_ptr++ = 0;
>
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer += 1;
> - }
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer += 1;
> + }
>
> - xtemp_outer = mb_width - 2;
> - if (xtemp_outer < 0)
> + xtemp_outer = mb_width - 2;
> + if (xtemp_outer < 0)
> xtemp_outer = 0;
> - x_outer = xtemp_outer;
> - y_outer = 0;
> - for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_outer = xtemp_outer;
> + y_outer = 0;
> + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> y_inner = y_outer;
> - x_inner = x_outer;
> - for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> - mb_intra_ub = 0;
> - score_dep = 0;
> - if (x_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> - score_dep |= MB_SCOREBOARD_A;
> + x_inner = x_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> +
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> }
> - if (y_inner != 0) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> - score_dep |= MB_SCOREBOARD_B;
> -
> - if (x_inner != 0)
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> - if (x_inner != (mb_width -1)) {
> - mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> - score_dep |= MB_SCOREBOARD_C;
> - }
> - }
> -
> - *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> - *command_ptr++ = kernel;
> - *command_ptr++ = MPEG2_SCOREBOARD;
> - /* Indirect data */
> - *command_ptr++ = 0;
> - /* the (X, Y) term of scoreboard */
> - *command_ptr++ = ((y_inner << 16) | x_inner);
> - *command_ptr++ = score_dep;
> - /*inline data */
> - *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> - *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = MPEG2_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
>
> *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> *command_ptr++ = 0;
> - x_inner -= 2;
> - y_inner += 1;
> - }
> - x_outer++;
> - if (x_outer >= mb_width) {
> - y_outer += 1;
> - x_outer = xtemp_outer;
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer++;
> + if (x_outer >= mb_width) {
> + y_outer += 1;
> + x_outer = xtemp_outer;
> }
> - }
> + }
> }
>
> *command_ptr++ = MI_BATCH_BUFFER_END;
> @@ -1038,23 +1082,23 @@ gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
> VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
>
> for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> - int j;
> + int j;
> VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
>
> for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
> - if (slice_param->macroblock_address % width_in_mbs) {
> - allow_hwscore = false;
> - break;
> - }
> - }
> + if (slice_param->macroblock_address % width_in_mbs) {
> + allow_hwscore = false;
> + break;
> + }
> + }
> }
>
> pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
> if (pic_param->picture_type == VAEncPictureTypeIntra) {
> - allow_hwscore = false;
> - kernel_shader = VME_INTRA_SHADER;
> + allow_hwscore = false;
> + kernel_shader = VME_INTRA_SHADER;
> } else {
> - kernel_shader = VME_INTER_SHADER;
> + kernel_shader = VME_INTER_SHADER;
> }
>
> if (allow_hwscore)
> @@ -1064,7 +1108,7 @@ gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
> kernel_shader,
> encoder_context);
> else
> - gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
> + gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
> encode_state,
> width_in_mbs, height_in_mbs,
> is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
> @@ -1100,7 +1144,7 @@ gen9_vme_mpeg2_prepare(VADriverContextP ctx,
>
> if ((!vme_context->mpeg2_level) ||
> (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
> - vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
> + vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
> }
>
> /*Setup all the memory object*/
> @@ -1130,6 +1174,440 @@ gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
> return VA_STATUS_SUCCESS;
> }
>
> +/* HEVC */
> +
> +static void
> +gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int index,
> + struct intel_encoder_context *encoder_context)
> +
> +{
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> + int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> +
> + vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
> + vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
> +
> + if (is_intra)
> + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
> + else
> + vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
> + /*
> + * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
> + * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
> + * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
> + */
> +
> + vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
> + "VME output buffer",
> + vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
> + 0x1000);
> + assert(vme_context->vme_output.bo);
> + vme_context->vme_buffer_suface_setup(ctx,
> + &vme_context->gpe_context,
> + &vme_context->vme_output,
> + BINDING_TABLE_OFFSET(index),
> + SURFACE_STATE_OFFSET(index));
> +}
> +
> +static void
> +gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int index,
> + struct intel_encoder_context *encoder_context)
> +
> +{
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> + int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> + vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
> + vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
> + vme_context->vme_batchbuffer.pitch = 16;
> + vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
> + "VME batchbuffer",
> + vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
> + 0x1000);
> +}
> +static VAStatus
> +gen9_vme_hevc_surface_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int is_intra,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct object_surface *obj_surface;
> +
> + /*Setup surfaces state*/
> + /* current picture for encoding */
> + obj_surface = encode_state->input_yuv_object;
> + gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
> + gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
> + gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
> +
> + if (!is_intra) {
> + VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int slice_type;
> +
> + slice_type = slice_param->slice_type;
> + assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
> +
> + /* to do HEVC */
> + intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
> +
> + if (slice_type == SLICE_TYPE_B)
> + intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
> + }
> +
> + /* VME output */
> + gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
> + gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
> +
> + return VA_STATUS_SUCCESS;
> +}
> +static void
> +gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int mb_width, int mb_height,
> + int kernel,
> + int transform_8x8_mode_flag,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + int mb_row;
> + int s;
> + unsigned int *command_ptr;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> + int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> + int ctb_size = 1 << log2_ctb_size;
> + int num_mb_in_ctb = (ctb_size + 15)/16;
> + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +#define USE_SCOREBOARD (1 << 21)
> +
> + dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> + command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> + /*slice_segment_address must picture_width_in_ctb alainment */
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> + int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> + int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
> + unsigned int mb_intra_ub, score_dep;
> + int x_outer, y_outer, x_inner, y_inner;
> + int xtemp_outer = 0;
> +
> + x_outer = first_mb % mb_width;
> + y_outer = first_mb / mb_width;
> + mb_row = y_outer;
> +
> + for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + x_inner = x_outer;
> + y_inner = y_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> + *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> + *command_ptr++ = 0;
> +
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer += 1;
> + }
> +
> + xtemp_outer = mb_width - 2;
> + if (xtemp_outer < 0)
> + xtemp_outer = 0;
> + x_outer = xtemp_outer;
> + y_outer = first_mb / mb_width;
> + for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> + y_inner = y_outer;
> + x_inner = x_outer;
> + for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> + mb_intra_ub = 0;
> + score_dep = 0;
> + if (x_inner != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + score_dep |= MB_SCOREBOARD_A;
> + }
> + if (y_inner != mb_row) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + score_dep |= MB_SCOREBOARD_B;
> + if (x_inner != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> + if (x_inner != (mb_width -1)) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + score_dep |= MB_SCOREBOARD_C;
> + }
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = USE_SCOREBOARD;
> + /* Indirect data */
> + *command_ptr++ = 0;
> + /* the (X, Y) term of scoreboard */
> + *command_ptr++ = ((y_inner << 16) | x_inner);
> + *command_ptr++ = score_dep;
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> + *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> + *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> + *command_ptr++ = 0;
> + x_inner -= 2;
> + y_inner += 1;
> + }
> + x_outer++;
> + if (x_outer >= mb_width) {
> + y_outer += 1;
> + x_outer = xtemp_outer;
> + }
> + }
> + }
> +
> + *command_ptr++ = MI_BATCH_BUFFER_END;
> + *command_ptr++ = 0;
> +
> + dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void
> +gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int mb_width, int mb_height,
> + int kernel,
> + int transform_8x8_mode_flag,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + int mb_x = 0, mb_y = 0;
> + int i, s;
> + unsigned int *command_ptr;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> + int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> +
> + int ctb_size = 1 << log2_ctb_size;
> + int num_mb_in_ctb = (ctb_size + 15)/16;
> + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> + dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> + command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> + int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> + int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
> +
> + unsigned int mb_intra_ub;
> + int slice_mb_x = slice_mb_begin % mb_width;
> + for (i = 0; i < slice_mb_number; ) {
> + int mb_count = i + slice_mb_begin;
> + mb_x = mb_count % mb_width;
> + mb_y = mb_count / mb_width;
> + mb_intra_ub = 0;
> +
> + if (mb_x != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> + }
> + if (mb_y != 0) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> + if (mb_x != 0)
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> + if (mb_x != (mb_width -1))
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + }
> + if (i < mb_width) {
> + if (i == 0)
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> + if ((i == (mb_width - 1)) && slice_mb_x) {
> + mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> + }
> + }
> +
> + if ((i == mb_width) && slice_mb_x) {
> + mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> + }
> +
> + *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> + *command_ptr++ = kernel;
> + *command_ptr++ = 0;
> + *command_ptr++ = 0;
> + *command_ptr++ = 0;
> + *command_ptr++ = 0;
> +
> + /*inline data */
> + *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
> + *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> + *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> + *command_ptr++ = 0;
> + i += 1;
> + }
> + }
> +
> + *command_ptr++ = MI_BATCH_BUFFER_END;
> + *command_ptr++ = 0;
> +
> + dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + struct intel_batchbuffer *batch = encoder_context->base.batch;
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> + int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> + int kernel_shader;
> + bool allow_hwscore = true;
> + int s;
> +
> + int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> + int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> +
> + int ctb_size = 1 << log2_ctb_size;
> + int num_mb_in_ctb = (ctb_size + 15)/16;
> + int transform_8x8_mode_flag = 1;
> + num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> + int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> + if ((slice_mb_begin % width_in_mbs)) {
> + allow_hwscore = false;
> + break;
> + }
> + }
> +
> + if (pSliceParameter->slice_type == SLICE_TYPE_I) {
> + kernel_shader = VME_INTRA_SHADER;
> + } else if (pSliceParameter->slice_type == SLICE_TYPE_P) {
> + kernel_shader = VME_INTER_SHADER;
> + } else {
> + kernel_shader = VME_BINTER_SHADER;
> + if (!allow_hwscore)
> + kernel_shader = VME_INTER_SHADER;
> + }
> + if (allow_hwscore)
> + gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
> + encode_state,
> + width_in_mbs, height_in_mbs,
> + kernel_shader,
> + transform_8x8_mode_flag,
> + encoder_context);
> + else
> + gen9_vme_hevc_fill_vme_batchbuffer(ctx,
> + encode_state,
> + width_in_mbs, height_in_mbs,
> + kernel_shader,
> + transform_8x8_mode_flag,
> + encoder_context);
> +
> + intel_batchbuffer_start_atomic(batch, 0x1000);
> + gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
> + BEGIN_BATCH(batch, 3);
> + OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
> + OUT_RELOC(batch,
> + vme_context->vme_batchbuffer.bo,
> + I915_GEM_DOMAIN_COMMAND, 0,
> + 0);
> + OUT_BATCH(batch, 0);
> + ADVANCE_BATCH(batch);
> +
> + gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
> +
> + intel_batchbuffer_end_atomic(batch);
> +}
> +
> +static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + VAStatus vaStatus = VA_STATUS_SUCCESS;
> + VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> + int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> + VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +
> + /* here use the avc level for hevc vme */
> + if (!vme_context->hevc_level ||
> + (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
> + vme_context->hevc_level = pSequenceParameter->general_level_idc;
> + }
> +
> + intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
> +
> + /*Setup all the memory object*/
> + gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
> + gen9_vme_interface_setup(ctx, encode_state, encoder_context);
> + //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
> + gen9_vme_constant_setup(ctx, encode_state, encoder_context);
> +
> + /*Programing media pipeline*/
> + gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
> +
> + return vaStatus;
> +}
> +
> +
> +static VAStatus
> +gen9_vme_hevc_pipeline(VADriverContextP ctx,
> + VAProfile profile,
> + struct encode_state *encode_state,
> + struct intel_encoder_context *encoder_context)
> +{
> + gen9_vme_media_init(ctx, encoder_context);
> + gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
> + gen9_vme_run(ctx, encode_state, encoder_context);
> + gen9_vme_stop(ctx, encode_state, encoder_context);
> +
> + return VA_STATUS_SUCCESS;
> +}
> +
> +
> static void
> gen9_vme_context_destroy(void *context)
> {
> @@ -1147,8 +1625,8 @@ gen9_vme_context_destroy(void *context)
> vme_context->vme_batchbuffer.bo = NULL;
>
> if (vme_context->vme_state_message) {
> - free(vme_context->vme_state_message);
> - vme_context->vme_state_message = NULL;
> + free(vme_context->vme_state_message);
> + vme_context->vme_state_message = NULL;
> }
>
> free(vme_context);
> @@ -1172,6 +1650,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
> vme_kernel_list = gen9_vme_mpeg2_kernels;
> encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
> i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
> + break;
> +
> + case CODEC_HEVC:
> + vme_kernel_list = gen9_vme_hevc_kernels;
> + encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
> + i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
>
> break;
>
> --
> 1.9.1
>
> _______________________________________________
> Libva mailing list
> Libva at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/libva
More information about the Libva
mailing list