[Libva] [PATCH 1/5] VME pipeline for HEVC

Zhao, Yakui yakui.zhao at intel.com
Mon Jan 5 21:30:22 PST 2015


On Mon, 2015-01-05 at 22:20 -0700, Qu,Pengfei wrote:
> Signed-off-by: Qu,Pengfei <Pengfei.Qu at intel.com>

The patch includes too much changes of coding style besides setting up
VME pipeline for HEVC.
(For example: the code change in intel_mfc_avc_prepare, 
intel_vme_update_mbmv_cost, gen7_vme_walker_fill_vme_batchbuffer).

In fact these are mainly the update of coding style. It is not related
with setting VME pipeline for HEVC.

It will be better that it can be split into two patches instead of
combining them together.

Thanks
    Yakui
> ---
>  src/gen6_mfc_common.c | 688 ++++++++++++++++++++++++-------------
>  src/gen6_vme.h        |  19 ++
>  src/gen9_vme.c        | 928 ++++++++++++++++++++++++++++++++++++++------------
>  3 files changed, 1180 insertions(+), 455 deletions(-)
> 
> diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
> index fe41dac..a69f00a 100644
> --- a/src/gen6_mfc_common.c
> +++ b/src/gen6_mfc_common.c
> @@ -631,8 +631,8 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
>      int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
>  
>      if (IS_GEN6(i965->intel.device_info)) {
> -	/* On the SNB it should be fixed to 128 for the DMV buffer */
> -	width_in_mbs = 128;
> +        /* On the SNB it should be fixed to 128 for the DMV buffer */
> +        width_in_mbs = 128;
>      }
>  
>      for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
> @@ -735,7 +735,7 @@ VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
>              break;
>          }
>      }
> -	
> +
>      mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
>      dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
>  
> @@ -807,7 +807,7 @@ int intel_format_lutvalue(int value, int max)
>      if (temp1 > temp2)
>          ret = max;
>      return ret;
> -	
> +
>  }
>  
> 
> @@ -842,40 +842,40 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
>  
>      
>      if (encoder_context->rate_control_mode == VA_RC_CQP)
> -	qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> +        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
>      else
> -	qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> -  
> +        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> +
>      if (vme_state_message == NULL)
> -	return;
> - 
> +        return;
> +
>      assert(qp <= QP_MAX); 
>      lambda = intel_lambda_qp(qp);
>      if (slice_type == SLICE_TYPE_I) {
> -	vme_state_message[MODE_INTRA_16X16] = 0;
> -	m_cost = lambda * 4;
> -	vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> -	m_cost = lambda * 16; 
> -	vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> -	m_cost = lambda * 3;
> -	vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> +        vme_state_message[MODE_INTRA_16X16] = 0;
> +        m_cost = lambda * 4;
> +        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 16; 
> +        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 3;
> +        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
>      } else {
> -    	m_cost = 0;
> -	vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> -	for (j = 1; j < 3; j++) {
> +        m_cost = 0;
> +        vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> +        for (j = 1; j < 3; j++) {
>              m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
>              m_cost = (int)m_costf;
>              vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
> -   	}
> -    	mv_count = 3;
> -    	for (j = 4; j <= 64; j *= 2) {
> +        }
> +        mv_count = 3;
> +        for (j = 4; j <= 64; j *= 2) {
>              m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
>              m_cost = (int)m_costf;
>              vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
>              mv_count++;
> -	}
> +        }
>  
> -	if (qp <= 25) {
> +        if (qp <= 25) {
>              vme_state_message[MODE_INTRA_16X16] = 0x4a;
>              vme_state_message[MODE_INTRA_8X8] = 0x4a;
>              vme_state_message[MODE_INTRA_4X4] = 0x4a;
> @@ -887,17 +887,17 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
>              vme_state_message[MODE_INTER_4X4] = 0x4a;
>              vme_state_message[MODE_INTER_BWD] = 0x2a;
>              return;
> -	}
> -	m_costf = lambda * 10;
> -	vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> -	m_cost = lambda * 14;
> -	vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> -	m_cost = lambda * 24; 
> -	vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> -	m_costf = lambda * 3.5;
> -	m_cost = m_costf;
> -	vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> -    	if (slice_type == SLICE_TYPE_P) {
> +        }
> +        m_costf = lambda * 10;
> +        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 14;
> +        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 24; 
> +        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_costf = lambda * 3.5;
> +        m_cost = m_costf;
> +        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> +        if (slice_type == SLICE_TYPE_P) {
>              m_costf = lambda * 2.5;
>              m_cost = m_costf;
>              vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> @@ -915,7 +915,7 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
>              vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
>              /* BWD is not used in P-frame */
>              vme_state_message[MODE_INTER_BWD] = 0;
> -	} else {
> +        } else {
>              m_costf = lambda * 2.5;
>              m_cost = m_costf;
>              vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> @@ -934,7 +934,7 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
>              m_costf = lambda * 1.5;
>              m_cost = m_costf;
>              vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
> -	}
> +        }
>      }
>  }
>  
> @@ -961,7 +961,7 @@ gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_cont
>      vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
>      vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
>      vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
> -	
> +
>      vme_context->gpe_context.vfe_desc7.dword = 0;
>      return;
>  }
> @@ -974,7 +974,7 @@ static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num
>          return -1;
>      if (y_index < 0 || y_index >= mb_height)
>          return -1;
> -	
> +
>      mb_index = y_index * mb_width + x_index;
>      if (mb_index < first_mb || mb_index > (first_mb + num_mb))
>          return -1;
> @@ -1000,103 +1000,103 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
>      command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>  
>      for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> -	VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> -	int first_mb = pSliceParameter->macroblock_address;
> -	int num_mb = pSliceParameter->num_macroblocks;
> -	unsigned int mb_intra_ub, score_dep;
> -	int x_outer, y_outer, x_inner, y_inner;
> -	int xtemp_outer = 0;
> -
> -	x_outer = first_mb % mb_width;
> -	y_outer = first_mb / mb_width;
> -	mb_row = y_outer;
> -				 
> -	for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> -	    x_inner = x_outer;
> -	    y_inner = y_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -		mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A; 
> -		}
> -		if (y_inner != mb_row) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> +        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> +        int first_mb = pSliceParameter->macroblock_address;
> +        int num_mb = pSliceParameter->num_macroblocks;
> +        unsigned int mb_intra_ub, score_dep;
> +        int x_outer, y_outer, x_inner, y_inner;
> +        int xtemp_outer = 0;
> +
> +        x_outer = first_mb % mb_width;
> +        y_outer = first_mb / mb_width;
> +        mb_row = y_outer;
> +
> +        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            x_inner = x_outer;
> +            y_inner = y_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A; 
> +                }
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
>                      }
> -		}
> -							
> -            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = USE_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer += 1;
> -	}
> -
> -	xtemp_outer = mb_width - 2;
> -	if (xtemp_outer < 0)
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer += 1;
> +        }
> +
> +        xtemp_outer = mb_width - 2;
> +        if (xtemp_outer < 0)
>              xtemp_outer = 0;
> -	x_outer = xtemp_outer;
> -	y_outer = first_mb / mb_width;
> -	for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
> -	    y_inner = y_outer;
> -	    x_inner = x_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -	    	mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A; 
> -		}
> -		if (y_inner != mb_row) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> +        x_outer = xtemp_outer;
> +        y_outer = first_mb / mb_width;
> +        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
> +            y_inner = y_outer;
> +            x_inner = x_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A; 
> +                }
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
>                      }
> -		}
> -
> -            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = USE_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> -
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer++;
> -	    if (x_outer >= mb_width) {
> -		y_outer += 1;
> -		x_outer = xtemp_outer;
> -	    }		
> -	}
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer++;
> +            if (x_outer >= mb_width) {
> +                y_outer += 1;
> +                x_outer = xtemp_outer;
> +            }		
> +        }
>      }
>  
>      *command_ptr++ = 0;
> @@ -1270,7 +1270,7 @@ void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
>           */
>          vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
>          vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> -			
> +
>          vme_state_message[MODE_INTER_16X8] = 0;
>          vme_state_message[MODE_INTER_8X8] = 0;
>          vme_state_message[MODE_INTER_8X4] = 0;
> @@ -1300,105 +1300,105 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
>      command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>  
>      {
> -	unsigned int mb_intra_ub, score_dep;
> -	int x_outer, y_outer, x_inner, y_inner;
> -	int xtemp_outer = 0;
> -	int first_mb = 0;
> -	int num_mb = mb_width * mb_height;
> -
> -	x_outer = 0;
> -	y_outer = 0;
> -	
> -				 
> -	for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> -	    x_inner = x_outer;
> -	    y_inner = y_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -		mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A; 
> -		}
> -		if (y_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> -		    }
> -		}
> -							
> -            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = MPEG2_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer += 1;
> -	}
> -
> -	xtemp_outer = mb_width - 2;
> -	if (xtemp_outer < 0)
> +        unsigned int mb_intra_ub, score_dep;
> +        int x_outer, y_outer, x_inner, y_inner;
> +        int xtemp_outer = 0;
> +        int first_mb = 0;
> +        int num_mb = mb_width * mb_height;
> +
> +        x_outer = 0;
> +        y_outer = 0;
> +
> +
> +        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            x_inner = x_outer;
> +            y_inner = y_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A; 
> +                }
> +                if (y_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = MPEG2_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer += 1;
> +        }
> +
> +        xtemp_outer = mb_width - 2;
> +        if (xtemp_outer < 0)
>              xtemp_outer = 0;
> -	x_outer = xtemp_outer;
> -	y_outer = 0;
> -	for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
> -	    y_inner = y_outer;
> -	    x_inner = x_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -	    	mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A; 
> -		}
> -		if (y_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> -		    }
> -		}
> -
> -            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = MPEG2_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> -
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer++;
> -	    if (x_outer >= mb_width) {
> -		y_outer += 1;
> -		x_outer = xtemp_outer;
> -	    }		
> -	}
> +        x_outer = xtemp_outer;
> +        y_outer = 0;
> +        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) { 
> +            y_inner = y_outer;
> +            x_inner = x_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A; 
> +                }
> +                if (y_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = MPEG2_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> +
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer++;
> +            if (x_outer >= mb_width) {
> +                y_outer += 1;
> +                x_outer = xtemp_outer;
> +            }		
> +        }
>      }
>  
>      *command_ptr++ = 0;
> @@ -1406,7 +1406,7 @@ gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
>  
>      dri_bo_unmap(vme_context->vme_batchbuffer.bo);
>      return;
> -}
> +                                           }
>  
>  static int
>  avc_temporal_find_surface(VAPictureH264 *curr_pic,
> @@ -1652,3 +1652,225 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
>      return;
>  }
>  
> +/* HEVC */
> +static int
> +hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
> +                           VAPictureHEVC *ref_list,
> +                           int num_pictures,
> +                           int dir)
> +{
> +    int i, found = -1, min = 0x7FFFFFFF;
> +
> +    for (i = 0; i < num_pictures; i++) {
> +        int tmp;
> +
> +        if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
> +            (ref_list[i].picture_id == VA_INVALID_SURFACE))
> +            break;
> +
> +        tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
> +
> +        if (dir)
> +            tmp = -tmp;
> +
> +        if (tmp > 0 && tmp < min) {
> +            min = tmp;
> +            found = i;
> +        }
> +    }
> +
> +    return found;
> +}
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> +                               struct encode_state *encode_state,
> +                               struct intel_encoder_context *encoder_context,
> +                               int list_index,
> +                               int surface_index,
> +                               void (* vme_source_surface_state)(
> +                                   VADriverContextP ctx,
> +                                   int index,
> +                                   struct object_surface *obj_surface,
> +                                   struct intel_encoder_context *encoder_context))
> +{
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    struct object_surface *obj_surface = NULL;
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    VASurfaceID ref_surface_id;
> +    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> +    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int max_num_references;
> +    VAPictureHEVC *curr_pic;
> +    VAPictureHEVC *ref_list;
> +    int ref_idx;
> +
> +    if (list_index == 0) {
> +        max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
> +        ref_list = slice_param->ref_pic_list0;
> +    } else {
> +        max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
> +        ref_list = slice_param->ref_pic_list1;
> +    }
> +
> +    if (max_num_references == 1) {
> +        if (list_index == 0) {
> +            ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
> +            vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
> +        } else {
> +            ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
> +            vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
> +        }
> +
> +        if (ref_surface_id != VA_INVALID_SURFACE)
> +            obj_surface = SURFACE(ref_surface_id);
> +
> +        if (!obj_surface ||
> +            !obj_surface->bo) {
> +            obj_surface = encode_state->reference_objects[list_index];
> +            vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
> +        }
> +
> +        ref_idx = 0;
> +    } else {
> +        curr_pic = &pic_param->decoded_curr_pic;
> +
> +        /* select the reference frame in temporal space */
> +        ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
> +        ref_surface_id = ref_list[ref_idx].picture_id;
> +
> +        if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
> +            obj_surface = SURFACE(ref_surface_id);
> +
> +        vme_context->used_reference_objects[list_index] = obj_surface;
> +        vme_context->used_references[list_index] = &ref_list[ref_idx];
> +    }
> +
> +    if (obj_surface &&
> +        obj_surface->bo) {
> +        assert(ref_idx >= 0);
> +        vme_context->used_reference_objects[list_index] = obj_surface;
> +        vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
> +        vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
> +                ref_idx << 16 |
> +                ref_idx <<  8 |
> +                ref_idx);
> +    } else {
> +        vme_context->used_reference_objects[list_index] = NULL;
> +        vme_context->used_references[list_index] = NULL;
> +        vme_context->ref_index_in_mb[list_index] = 0;
> +    }
> +}
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> +                                     struct encode_state *encode_state,
> +                                     struct intel_encoder_context *encoder_context)
> +{
> +    //struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> +    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int qp, m_cost, j, mv_count;
> +    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
> +    float   lambda, m_costf;
> +
> +    /* here no SI SP slice for HEVC, do not need slice fixup */
> +    int slice_type = slice_param->slice_type;
> +
> +
> +    /* to do for CBR*/
> +    //if (encoder_context->rate_control_mode == VA_RC_CQP)
> +    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> +    //else
> +    //qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> +
> +    if (vme_state_message == NULL)
> +        return;
> +
> +    assert(qp <= QP_MAX);
> +    lambda = intel_lambda_qp(qp);
> +    if (slice_type == SLICE_TYPE_I) {
> +        vme_state_message[MODE_INTRA_16X16] = 0;
> +        m_cost = lambda * 4;
> +        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 16;
> +        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 3;
> +        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> +    } else {
> +        m_cost = 0;
> +        vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> +        for (j = 1; j < 3; j++) {
> +            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> +            m_cost = (int)m_costf;
> +            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
> +        }
> +        mv_count = 3;
> +        for (j = 4; j <= 64; j *= 2) {
> +            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> +            m_cost = (int)m_costf;
> +            vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
> +            mv_count++;
> +        }
> +
> +        if (qp <= 25) {
> +            vme_state_message[MODE_INTRA_16X16] = 0x4a;
> +            vme_state_message[MODE_INTRA_8X8] = 0x4a;
> +            vme_state_message[MODE_INTRA_4X4] = 0x4a;
> +            vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
> +            vme_state_message[MODE_INTER_16X16] = 0x4a;
> +            vme_state_message[MODE_INTER_16X8] = 0x4a;
> +            vme_state_message[MODE_INTER_8X8] = 0x4a;
> +            vme_state_message[MODE_INTER_8X4] = 0x4a;
> +            vme_state_message[MODE_INTER_4X4] = 0x4a;
> +            vme_state_message[MODE_INTER_BWD] = 0x2a;
> +            return;
> +        }
> +        m_costf = lambda * 10;
> +        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 14;
> +        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 24;
> +        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_costf = lambda * 3.5;
> +        m_cost = m_costf;
> +        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> +        if (slice_type == SLICE_TYPE_P) {
> +            m_costf = lambda * 2.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 4;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 1.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 3;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            /* BWD is not used in P-frame */
> +            vme_state_message[MODE_INTER_BWD] = 0;
> +        } else {
> +            m_costf = lambda * 2.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 5.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 3.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 5.0;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 6.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 1.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
> +        }
> +    }
> +}
> diff --git a/src/gen6_vme.h b/src/gen6_vme.h
> index bc62c14..c9d6b48 100644
> --- a/src/gen6_vme.h
> +++ b/src/gen6_vme.h
> @@ -83,6 +83,7 @@ struct gen6_vme_context
>                                             unsigned long surface_state_offset);
>      void *vme_state_message;
>      unsigned int h264_level;
> +    unsigned int hevc_level;
>      unsigned int video_coding_type;
>      unsigned int vme_kernel_sum;
>      unsigned int mpeg2_level;
> @@ -174,6 +175,24 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
>                                    struct object_surface *obj_surface,
>                                    struct intel_encoder_context *encoder_context));
>  
> +/* HEVC */
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> +                              struct encode_state *encode_state,
> +                              struct intel_encoder_context *encoder_context,
> +                              int list_index,
> +                              int surface_index,
> +                              void (* vme_source_surface_state)(
> +                                  VADriverContextP ctx,
> +                                  int index,
> +                                  struct object_surface *obj_surface,
> +                                  struct intel_encoder_context *encoder_context));
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> +                                struct encode_state *encode_state,
> +                                struct intel_encoder_context *encoder_context);
> +
> +
>  extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
>  
>  extern Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
> diff --git a/src/gen9_vme.c b/src/gen9_vme.c
> index b4310f2..9239645 100644
> --- a/src/gen9_vme.c
> +++ b/src/gen9_vme.c
> @@ -51,13 +51,13 @@
>  
>  #define VME_INTRA_SHADER        0
>  #define VME_INTER_SHADER        1
> -#define VME_BINTER_SHADER	2
> +#define VME_BINTER_SHADER       2
>  
>  #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
>  #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
>  #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
>  
> -#define VME_MSG_LENGTH		32
> +#define VME_MSG_LENGTH          32
>  
>  static const uint32_t gen9_vme_intra_frame[][4] = {
>  #include "shaders/vme/intra_frame_gen9.g9b"
> @@ -120,6 +120,43 @@ static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
>      },
>  };
>  
> +/* HEVC */
> +
> +static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
> +#include "shaders/vme/intra_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
> +#include "shaders/vme/inter_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
> +#include "shaders/vme/inter_bframe_gen9.g9b"
> +};
> +
> +static struct i965_kernel gen9_vme_hevc_kernels[] = {
> +    {
> +        "VME Intra Frame",
> +        VME_INTRA_SHADER, /*index*/
> +        gen9_vme_hevc_intra_frame,
> +        sizeof(gen9_vme_hevc_intra_frame),
> +        NULL
> +    },
> +    {
> +        "VME inter Frame",
> +        VME_INTER_SHADER,
> +        gen9_vme_hevc_inter_frame,
> +        sizeof(gen9_vme_hevc_inter_frame),
> +        NULL
> +    },
> +    {
> +        "VME inter BFrame",
> +        VME_BINTER_SHADER,
> +        gen9_vme_hevc_inter_bframe,
> +        sizeof(gen9_vme_hevc_inter_bframe),
> +        NULL
> +    }
> +};
>  /* only used for VME source surface state */
>  static void
>  gen9_vme_source_surface_state(VADriverContextP ctx,
> @@ -226,13 +263,13 @@ gen9_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
>                                                     "VME batchbuffer",
>                                                     vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
>                                                     0x1000);
> -	/*
> +    /*
>      vme_context->vme_buffer_suface_setup(ctx,
>                                           &vme_context->gpe_context,
>                                           &vme_context->vme_batchbuffer,
>                                           BINDING_TABLE_OFFSET(index),
>                                           SURFACE_STATE_OFFSET(index));
> -	*/
> +    */
>  }
>  
>  static VAStatus
> @@ -251,15 +288,15 @@ gen9_vme_surface_setup(VADriverContextP ctx,
>      gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
>  
>      if (!is_intra) {
> -	VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
> -	int slice_type;
> +        VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
> +        int slice_type;
>  
> -	slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
> -	assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
> +        slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
> +        assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
>  
> -	intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
> +        intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
>  
> -	if (slice_type == SLICE_TYPE_B)
> +        if (slice_type == SLICE_TYPE_B)
>              intel_avc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
>      }
>  
> @@ -330,6 +367,13 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
>          }
>      } else if (encoder_context->codec == CODEC_MPEG2) {
>          mv_num = 2;
> +    }else if (encoder_context->codec == CODEC_HEVC) {
> +        if (vme_context->hevc_level >= 30*3) {
> +            mv_num = 16;
> +
> +            if (vme_context->hevc_level >= 31*3)
> +                mv_num = 8;
> +        }/* use the avc level setting */
>      }
>  
>      vme_state_message[31] = mv_num;
> @@ -388,107 +432,107 @@ gen9wa_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
>      command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>  
>      for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> -	VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> -	int first_mb = pSliceParameter->macroblock_address;
> -	int num_mb = pSliceParameter->num_macroblocks;
> -	unsigned int mb_intra_ub, score_dep;
> -	int x_outer, y_outer, x_inner, y_inner;
> -	int xtemp_outer = 0;
> -
> -	x_outer = first_mb % mb_width;
> -	y_outer = first_mb / mb_width;
> -	mb_row = y_outer;
> -
> -	for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> -	    x_inner = x_outer;
> -	    y_inner = y_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -		mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A;
> +        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> +        int first_mb = pSliceParameter->macroblock_address;
> +        int num_mb = pSliceParameter->num_macroblocks;
> +        unsigned int mb_intra_ub, score_dep;
> +        int x_outer, y_outer, x_inner, y_inner;
> +        int xtemp_outer = 0;
> +
> +        x_outer = first_mb % mb_width;
> +        y_outer = first_mb / mb_width;
> +        mb_row = y_outer;
> +
> +        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            x_inner = x_outer;
> +            y_inner = y_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
>                  }
> -		if (y_inner != mb_row) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
>                      }
> -		}
> -
> -		*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = USE_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
>                  *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
>                  *command_ptr++ = 0;
>  
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer += 1;
> -	}
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer += 1;
> +        }
>  
> -	xtemp_outer = mb_width - 2;
> -	if (xtemp_outer < 0)
> +        xtemp_outer = mb_width - 2;
> +        if (xtemp_outer < 0)
>              xtemp_outer = 0;
> -	x_outer = xtemp_outer;
> -	y_outer = first_mb / mb_width;
> -	for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +        x_outer = xtemp_outer;
> +        y_outer = first_mb / mb_width;
> +        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
>              y_inner = y_outer;
> -	    x_inner = x_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -		mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A;
> +            x_inner = x_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
>                  }
> -		if (y_inner != mb_row) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
>                      }
> -		}
> -
> -		*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = USE_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
>  
>                  *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
>                  *command_ptr++ = 0;
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer++;
> -	    if (x_outer >= mb_width) {
> -		y_outer += 1;
> -		x_outer = xtemp_outer;
> -	    }
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer++;
> +            if (x_outer >= mb_width) {
> +                y_outer += 1;
> +                x_outer = xtemp_outer;
> +            }
>          }
>      }
>  
> @@ -519,34 +563,34 @@ gen9_vme_fill_vme_batchbuffer(VADriverContextP ctx,
>          int slice_mb_begin = pSliceParameter->macroblock_address;
>          int slice_mb_number = pSliceParameter->num_macroblocks;
>          unsigned int mb_intra_ub;
> -	int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
> +        int slice_mb_x = pSliceParameter->macroblock_address % mb_width;
>          for (i = 0; i < slice_mb_number;  ) {
>              int mb_count = i + slice_mb_begin;
>              mb_x = mb_count % mb_width;
>              mb_y = mb_count / mb_width;
> -	    mb_intra_ub = 0;
> -	    if (mb_x != 0) {
> -		mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -	    }
> -	    if (mb_y != 0) {
> -		mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		if (mb_x != 0)
> +            mb_intra_ub = 0;
> +            if (mb_x != 0) {
> +                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +            }
> +            if (mb_y != 0) {
> +                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                if (mb_x != 0)
>                      mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -		if (mb_x != (mb_width -1))
> +                if (mb_x != (mb_width -1))
>                      mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -	    }
> -	    if (i < mb_width) {
> -		if (i == 0)
> +            }
> +            if (i < mb_width) {
> +                if (i == 0)
>                      mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
> -		mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> -		if ((i == (mb_width - 1)) && slice_mb_x) {
> +                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> +                if ((i == (mb_width - 1)) && slice_mb_x) {
>                      mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -		}
> -	    }
> +                }
> +            }
>  
> -	    if ((i == mb_width) && slice_mb_x) {
> -		mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> -	    }
> +            if ((i == mb_width) && slice_mb_x) {
> +                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> +            }
>              *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
>              *command_ptr++ = kernel;
>              *command_ptr++ = 0;
> @@ -670,7 +714,7 @@ static VAStatus gen9_vme_prepare(VADriverContextP ctx,
>  
>      if (!vme_context->h264_level ||
>          (vme_context->h264_level != pSequenceParameter->level_idc)) {
> -	vme_context->h264_level = pSequenceParameter->level_idc;
> +            vme_context->h264_level = pSequenceParameter->level_idc;
>      }
>  
>      intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
> @@ -837,109 +881,109 @@ gen9wa_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
>      command_ptr = vme_context->vme_batchbuffer.bo->virtual;
>  
>      {
> -	unsigned int mb_intra_ub, score_dep;
> -	int x_outer, y_outer, x_inner, y_inner;
> -	int xtemp_outer = 0;
> -	int first_mb = 0;
> -	int num_mb = mb_width * mb_height;
> -
> -	x_outer = 0;
> -	y_outer = 0;
> -
> -	for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> -	    x_inner = x_outer;
> -	    y_inner = y_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -		mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A;
> +        unsigned int mb_intra_ub, score_dep;
> +        int x_outer, y_outer, x_inner, y_inner;
> +        int xtemp_outer = 0;
> +        int first_mb = 0;
> +        int num_mb = mb_width * mb_height;
> +
> +        x_outer = 0;
> +        y_outer = 0;
> +
> +        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            x_inner = x_outer;
> +            y_inner = y_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
>                  }
> -		if (y_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> -		    }
> -		}
> -
> -		*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = MPEG2_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> +                if (y_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = MPEG2_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
>                  *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
>                  *command_ptr++ = 0;
>  
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer += 1;
> -	}
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer += 1;
> +        }
>  
> -	xtemp_outer = mb_width - 2;
> -	if (xtemp_outer < 0)
> +        xtemp_outer = mb_width - 2;
> +        if (xtemp_outer < 0)
>              xtemp_outer = 0;
> -	x_outer = xtemp_outer;
> -	y_outer = 0;
> -	for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +        x_outer = xtemp_outer;
> +        y_outer = 0;
> +        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
>              y_inner = y_outer;
> -	    x_inner = x_outer;
> -	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> -		mb_intra_ub = 0;
> -		score_dep = 0;
> -		if (x_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> -		    score_dep |= MB_SCOREBOARD_A;
> +            x_inner = x_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
> +                }
> +                if (y_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
>                  }
> -		if (y_inner != 0) {
> -		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> -		    score_dep |= MB_SCOREBOARD_B;
> -
> -		    if (x_inner != 0)
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> -
> -		    if (x_inner != (mb_width -1)) {
> -			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> -			score_dep |= MB_SCOREBOARD_C;
> -		    }
> -		}
> -
> -		*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> -		*command_ptr++ = kernel;
> -		*command_ptr++ = MPEG2_SCOREBOARD;
> -		/* Indirect data */
> -		*command_ptr++ = 0;
> -		/* the (X, Y) term of scoreboard */
> -		*command_ptr++ = ((y_inner << 16) | x_inner);
> -		*command_ptr++ = score_dep;
> -		/*inline data */
> -		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> -		*command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = MPEG2_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
>  
>                  *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
>                  *command_ptr++ = 0;
> -		x_inner -= 2;
> -		y_inner += 1;
> -	    }
> -	    x_outer++;
> -	    if (x_outer >= mb_width) {
> -		y_outer += 1;
> -		x_outer = xtemp_outer;
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer++;
> +            if (x_outer >= mb_width) {
> +                y_outer += 1;
> +                x_outer = xtemp_outer;
>              }
> -	}
> +        }
>      }
>  
>      *command_ptr++ = MI_BATCH_BUFFER_END;
> @@ -1038,23 +1082,23 @@ gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
>      VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
>  
>      for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> -	int j;
> +        int j;
>          VAEncSliceParameterBufferMPEG2 *slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[s]->buffer;
>  
>          for (j = 0; j < encode_state->slice_params_ext[s]->num_elements; j++) {
> -	    if (slice_param->macroblock_address % width_in_mbs) {
> -		allow_hwscore = false;
> -		break;
> -	    }
> -	}
> +            if (slice_param->macroblock_address % width_in_mbs) {
> +                allow_hwscore = false;
> +                break;
> +            }
> +        }
>      }
>  
>      pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
>      if (pic_param->picture_type == VAEncPictureTypeIntra) {
> -	allow_hwscore = false;
> -	kernel_shader = VME_INTRA_SHADER;
> +        allow_hwscore = false;
> +        kernel_shader = VME_INTRA_SHADER;
>      } else {
> -	kernel_shader = VME_INTER_SHADER;
> +        kernel_shader = VME_INTER_SHADER;
>      }
>  
>      if (allow_hwscore)
> @@ -1064,7 +1108,7 @@ gen9_vme_mpeg2_pipeline_programing(VADriverContextP ctx,
>                                                     kernel_shader,
>                                                     encoder_context);
>      else
> -	gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
> +        gen9_vme_mpeg2_fill_vme_batchbuffer(ctx,
>                                              encode_state,
>                                              width_in_mbs, height_in_mbs,
>                                              is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER,
> @@ -1100,7 +1144,7 @@ gen9_vme_mpeg2_prepare(VADriverContextP ctx,
>  
>      if ((!vme_context->mpeg2_level) ||
>          (vme_context->mpeg2_level != (seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK))) {
> -	vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
> +            vme_context->mpeg2_level = seq_param->sequence_extension.bits.profile_and_level_indication & MPEG2_LEVEL_MASK;
>      }
>  
>      /*Setup all the memory object*/
> @@ -1130,6 +1174,440 @@ gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
>      return VA_STATUS_SUCCESS;
>  }
>  
> +/* HEVC */
> +
> +static void
> +gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
> +                             struct encode_state *encode_state,
> +                             int index,
> +                             struct intel_encoder_context *encoder_context)
> +
> +{
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> +    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> +    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> +
> +    vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
> +    vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
> +
> +    if (is_intra)
> +        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
> +    else
> +        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
> +    /*
> +     * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
> +     * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
> +     * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
> +     */
> +
> +    vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
> +                                              "VME output buffer",
> +                                              vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
> +                                              0x1000);
> +    assert(vme_context->vme_output.bo);
> +    vme_context->vme_buffer_suface_setup(ctx,
> +                                         &vme_context->gpe_context,
> +                                         &vme_context->vme_output,
> +                                         BINDING_TABLE_OFFSET(index),
> +                                         SURFACE_STATE_OFFSET(index));
> +}
> +
> +static void
> +gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
> +                                      struct encode_state *encode_state,
> +                                      int index,
> +                                      struct intel_encoder_context *encoder_context)
> +
> +{
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> +    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> +    vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
> +    vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
> +    vme_context->vme_batchbuffer.pitch = 16;
> +    vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
> +                                                   "VME batchbuffer",
> +                                                   vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
> +                                                   0x1000);
> +}
> +static VAStatus
> +gen9_vme_hevc_surface_setup(VADriverContextP ctx,
> +                       struct encode_state *encode_state,
> +                       int is_intra,
> +                       struct intel_encoder_context *encoder_context)
> +{
> +    struct object_surface *obj_surface;
> +
> +    /*Setup surfaces state*/
> +    /* current picture for encoding */
> +    obj_surface = encode_state->input_yuv_object;
> +    gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
> +    gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
> +    gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
> +
> +    if (!is_intra) {
> +        VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +        int slice_type;
> +
> +        slice_type = slice_param->slice_type;
> +        assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
> +
> +        /* to do HEVC */
> +        intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
> +
> +        if (slice_type == SLICE_TYPE_B)
> +            intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
> +    }
> +
> +    /* VME output */
> +    gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
> +    gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
> +
> +    return VA_STATUS_SUCCESS;
> +}
> +static void
> +gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> +                                     struct encode_state *encode_state,
> +                                     int mb_width, int mb_height,
> +                                     int kernel,
> +                                     int transform_8x8_mode_flag,
> +                                     struct intel_encoder_context *encoder_context)
> +{
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    int mb_row;
> +    int s;
> +    unsigned int *command_ptr;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> +    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> +    int ctb_size = 1 << log2_ctb_size;
> +    int num_mb_in_ctb = (ctb_size + 15)/16;
> +    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +#define		USE_SCOREBOARD		(1 << 21)
> +
> +    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> +    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> +    /*slice_segment_address  must picture_width_in_ctb alainment */
> +    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +        VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> +        int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> +        int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
> +        unsigned int mb_intra_ub, score_dep;
> +        int x_outer, y_outer, x_inner, y_inner;
> +        int xtemp_outer = 0;
> +
> +        x_outer = first_mb % mb_width;
> +        y_outer = first_mb / mb_width;
> +        mb_row = y_outer;
> +
> +        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            x_inner = x_outer;
> +            y_inner = y_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
> +                }
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> +                *command_ptr++ = 0;
> +
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer += 1;
> +        }
> +
> +        xtemp_outer = mb_width - 2;
> +        if (xtemp_outer < 0)
> +            xtemp_outer = 0;
> +        x_outer = xtemp_outer;
> +        y_outer = first_mb / mb_width;
> +        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            y_inner = y_outer;
> +            x_inner = x_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
> +                }
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> +                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> +                *command_ptr++ = 0;
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer++;
> +            if (x_outer >= mb_width) {
> +                y_outer += 1;
> +                x_outer = xtemp_outer;
> +            }
> +        }
> +    }
> +
> +    *command_ptr++ = MI_BATCH_BUFFER_END;
> +    *command_ptr++ = 0;
> +
> +    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void
> +gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
> +                              struct encode_state *encode_state,
> +                              int mb_width, int mb_height,
> +                              int kernel,
> +                              int transform_8x8_mode_flag,
> +                              struct intel_encoder_context *encoder_context)
> +{
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    int mb_x = 0, mb_y = 0;
> +    int i, s;
> +    unsigned int *command_ptr;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> +    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> +
> +    int ctb_size = 1 << log2_ctb_size;
> +    int num_mb_in_ctb = (ctb_size + 15)/16;
> +    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> +    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> +    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +        VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> +        int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> +        int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
> +
> +        unsigned int mb_intra_ub;
> +        int slice_mb_x = slice_mb_begin % mb_width;
> +        for (i = 0; i < slice_mb_number;  ) {
> +            int mb_count = i + slice_mb_begin;
> +            mb_x = mb_count % mb_width;
> +            mb_y = mb_count / mb_width;
> +            mb_intra_ub = 0;
> +
> +            if (mb_x != 0) {
> +                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +            }
> +            if (mb_y != 0) {
> +                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                if (mb_x != 0)
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +                if (mb_x != (mb_width -1))
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +            }
> +            if (i < mb_width) {
> +                if (i == 0)
> +                    mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
> +                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> +                if ((i == (mb_width - 1)) && slice_mb_x) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                }
> +            }
> +
> +            if ((i == mb_width) && slice_mb_x) {
> +                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> +            }
> +
> +            *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +            *command_ptr++ = kernel;
> +            *command_ptr++ = 0;
> +            *command_ptr++ = 0;
> +            *command_ptr++ = 0;
> +            *command_ptr++ = 0;
> +
> +            /*inline data */
> +            *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
> +            *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> +            *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> +            *command_ptr++ = 0;
> +            i += 1;
> +        }
> +    }
> +
> +    *command_ptr++ = MI_BATCH_BUFFER_END;
> +    *command_ptr++ = 0;
> +
> +    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
> +                                         struct encode_state *encode_state,
> +                                         struct intel_encoder_context *encoder_context)
> +{
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    struct intel_batchbuffer *batch = encoder_context->base.batch;
> +    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> +    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +    int kernel_shader;
> +    bool allow_hwscore = true;
> +    int s;
> +
> +    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> +    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> +
> +    int ctb_size = 1 << log2_ctb_size;
> +    int num_mb_in_ctb = (ctb_size + 15)/16;
> +    int transform_8x8_mode_flag = 1;
> +    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +        pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> +        int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> +        if ((slice_mb_begin % width_in_mbs)) {
> +            allow_hwscore = false;
> +            break;
> +        }
> +    }
> +
> +    if (pSliceParameter->slice_type == SLICE_TYPE_I) {
> +        kernel_shader = VME_INTRA_SHADER;
> +    } else if (pSliceParameter->slice_type == SLICE_TYPE_P) {
> +        kernel_shader = VME_INTER_SHADER;
> +    } else {
> +        kernel_shader = VME_BINTER_SHADER;
> +        if (!allow_hwscore)
> +            kernel_shader = VME_INTER_SHADER;
> +    }
> +    if (allow_hwscore)
> +        gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
> +                                               encode_state,
> +                                               width_in_mbs, height_in_mbs,
> +                                               kernel_shader,
> +                                               transform_8x8_mode_flag,
> +                                               encoder_context);
> +    else
> +        gen9_vme_hevc_fill_vme_batchbuffer(ctx,
> +                                      encode_state,
> +                                      width_in_mbs, height_in_mbs,
> +                                      kernel_shader,
> +                                      transform_8x8_mode_flag,
> +                                      encoder_context);
> +
> +    intel_batchbuffer_start_atomic(batch, 0x1000);
> +    gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
> +    BEGIN_BATCH(batch, 3);
> +    OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
> +    OUT_RELOC(batch,
> +              vme_context->vme_batchbuffer.bo,
> +              I915_GEM_DOMAIN_COMMAND, 0,
> +              0);
> +    OUT_BATCH(batch, 0);
> +    ADVANCE_BATCH(batch);
> +
> +    gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
> +
> +    intel_batchbuffer_end_atomic(batch);
> +}
> +
> +static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
> +                                 struct encode_state *encode_state,
> +                                 struct intel_encoder_context *encoder_context)
> +{
> +    VAStatus vaStatus = VA_STATUS_SUCCESS;
> +    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +
> +    /* here use the avc level for hevc vme */
> +    if (!vme_context->hevc_level ||
> +        (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
> +        vme_context->hevc_level = pSequenceParameter->general_level_idc;
> +    }
> +
> +    intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
> +
> +    /*Setup all the memory object*/
> +    gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
> +    gen9_vme_interface_setup(ctx, encode_state, encoder_context);
> +    //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
> +    gen9_vme_constant_setup(ctx, encode_state, encoder_context);
> +
> +    /*Programing media pipeline*/
> +    gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
> +
> +    return vaStatus;
> +}
> +
> +
> +static VAStatus
> +gen9_vme_hevc_pipeline(VADriverContextP ctx,
> +                  VAProfile profile,
> +                  struct encode_state *encode_state,
> +                  struct intel_encoder_context *encoder_context)
> +{
> +    gen9_vme_media_init(ctx, encoder_context);
> +    gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
> +    gen9_vme_run(ctx, encode_state, encoder_context);
> +    gen9_vme_stop(ctx, encode_state, encoder_context);
> +
> +    return VA_STATUS_SUCCESS;
> +}
> +
> +
>  static void
>  gen9_vme_context_destroy(void *context)
>  {
> @@ -1147,8 +1625,8 @@ gen9_vme_context_destroy(void *context)
>      vme_context->vme_batchbuffer.bo = NULL;
>  
>      if (vme_context->vme_state_message) {
> -	free(vme_context->vme_state_message);
> -	vme_context->vme_state_message = NULL;
> +        free(vme_context->vme_state_message);
> +        vme_context->vme_state_message = NULL;
>      }
>  
>      free(vme_context);
> @@ -1172,6 +1650,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
>          vme_kernel_list = gen9_vme_mpeg2_kernels;
>          encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
>          i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
> +        break;
> +
> +   case CODEC_HEVC:
> +        vme_kernel_list = gen9_vme_hevc_kernels;
> +        encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
> +        i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
>  
>          break;
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Libva mailing list
> Libva at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/libva




More information about the Libva mailing list