[Libva] [PATCH V3 2/2] Add multi quality levels encoding support for GEN7

Zhao, Yakui yakui.zhao at intel.com
Tue Jun 10 22:30:55 PDT 2014


On Tue, 2014-06-10 at 20:49 -0600, Zhong Li wrote:
> Two encoding quality levels are support on GEN7.
> Default quality level is set to be 1, which has better quality,
> but higher gpu usage and worse performance.
> The second quality level is set to be 2, which has lower gpu usage and
> better performance,but worse quality.
> Other platforms support for multi-quality-level will be added later.
> 
> v1->v2: 1. follow haihao's comments to init and check quality_level.
>         2. remove CBR limitation for low quality level.
> 

This will be pushed.

Thanks.
    Yakui

> Signed-off-by: Zhong Li <zhong.li at intel.com>
> ---
>  src/gen6_mfc.c       |  130 ++++++++++++++++++++++++++++++++++++++------------
>  src/gen7_vme.c       |   84 +++++++++++++++++++++++++++-----
>  src/i965_drv_video.c |   10 ++++
>  src/i965_drv_video.h |    5 ++
>  src/i965_encoder.c   |   38 ++++++++++++++-
>  src/i965_encoder.h   |    2 +
>  6 files changed, 226 insertions(+), 43 deletions(-)
> 
> diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
> index 0a10054..30cdb66 100644
> --- a/src/gen6_mfc.c
> +++ b/src/gen6_mfc.c
> @@ -676,8 +676,6 @@ gen6_mfc_stop(VADriverContextP ctx,
>      return VA_STATUS_SUCCESS;
>  }
>  
> -#if __SOFTWARE__
> -
>  static int
>  gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
>                                struct intel_encoder_context *encoder_context,
> @@ -768,6 +766,58 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in
>  
>      return len_in_dwords;
>  }
> + 
> +static int
> +gen6_mfc_avc_pak_object_inter2(VADriverContextP ctx, int x, int y, int end_mb, int qp,
> +                              unsigned int offset,
> +                              struct intel_encoder_context *encoder_context,
> +                              struct intel_batchbuffer *batch)
> +{
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    int len_in_dwords = 11;
> +
> +    if (batch == NULL)
> +        batch = encoder_context->base.batch;
> +
> +    BEGIN_BCS_BATCH(batch, len_in_dwords);
> +
> +    OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
> +
> +    OUT_BCS_BATCH(batch, 32);         /* 32 MV*/
> +    OUT_BCS_BATCH(batch, offset);
> +
> +    OUT_BCS_BATCH(batch, 
> +                  (1 << 24) |     /* PackedMvNum, Debug*/
> +                  (4 << 20) |     /* 8 MV, SNB don't use it*/
> +                  (1 << 19) |     /* CbpDcY */
> +                  (1 << 18) |     /* CbpDcU */
> +                  (1 << 17) |     /* CbpDcV */
> +                  (0 << 15) |     /* Transform8x8Flag = 0*/
> +                  (0 << 14) |     /* Frame based*/
> +                  (0 << 13) |     /* Inter MB */
> +                  (1 << 8)  |     /* MbType = P_L0_16x16 */   
> +                  (0 << 7)  |     /* MBZ for frame */
> +                  (0 << 6)  |     /* MBZ */
> +                  (2 << 4)  |     /* MBZ for inter*/
> +                  (0 << 3)  |     /* MBZ */
> +                  (0 << 2)  |     /* SkipMbFlag */
> +                  (0 << 0));      /* InterMbMode */
> +
> +    OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
> +    OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */    
> +    OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);    /* Last MB */
> +
> +    /*Stuff for Inter MB*/
> +    OUT_BCS_BATCH(batch, 0x0);        
> +    OUT_BCS_BATCH(batch, 0x0);    
> +    OUT_BCS_BATCH(batch, 0x0);        
> +
> +    OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
> +
> +    ADVANCE_BCS_BATCH(batch);
> +
> +    return len_in_dwords;
> +}
>  
>  static void 
>  gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
> @@ -791,6 +841,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
>      unsigned int tail_data[] = { 0x0, 0x0 };
>      int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
>      int is_intra = slice_type == SLICE_TYPE_I;
> +    int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
>  
>      if (rate_control_mode == VA_RC_CBR) {
>          qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> @@ -818,36 +869,54 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
>      dri_bo_map(vme_context->vme_output.bo , 1);
>      msg = (unsigned int *)vme_context->vme_output.bo->virtual;
>  
> -    if (is_intra) {
> -        msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
> -    } else {
> -        msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
> -        msg += 32; /* the first 32 DWs are MVs */
> -        offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
> -    }
> -   
> -    for (i = pSliceParameter->macroblock_address; 
> -         i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
> -        int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
> -        x = i % width_in_mbs;
> -        y = i / width_in_mbs;
> +    if (is_low_quality) {
> +        for (i = pSliceParameter->macroblock_address; 
> +                i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
> +            int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
> +            x = i % width_in_mbs;
> +            y = i / width_in_mbs;
>  
> +            if (is_intra) {
> +                assert(msg);
> +                gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> +                msg += 4;
> +            } else {
> +                gen6_mfc_avc_pak_object_inter2(ctx, x, y, last_mb, qp, offset, encoder_context, slice_batch);
> +                offset += 64;
> +            }
> +        }
> +    } else {
>          if (is_intra) {
> -            assert(msg);
> -            gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> -            msg += INTRA_VME_OUTPUT_IN_DWS;
> +            msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
>          } else {
> -            if (msg[0] & INTRA_MB_FLAG_MASK) {
> +            msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
> +            msg += 32; /* the first 32 DWs are MVs */
> +            offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
> +        }
> +
> +        for (i = pSliceParameter->macroblock_address; 
> +                i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
> +            int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
> +            x = i % width_in_mbs;
> +            y = i / width_in_mbs;
> +
> +            if (is_intra) {
> +                assert(msg);
>                  gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> +                msg += INTRA_VME_OUTPUT_IN_DWS;
>              } else {
> -                gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
> -            }
> +                if (msg[0] & INTRA_MB_FLAG_MASK) {
> +                    gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> +                } else {
> +                    gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
> +                }
>  
> -            msg += INTER_VME_OUTPUT_IN_DWS;
> -            offset += INTER_VME_OUTPUT_IN_BYTES;
> +                msg += INTER_VME_OUTPUT_IN_DWS;
> +                offset += INTER_VME_OUTPUT_IN_BYTES;
> +            }
>          }
>      }
> -   
> +
>      dri_bo_unmap(vme_context->vme_output.bo);
>  
>      if ( last_slice ) {    
> @@ -896,8 +965,6 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
>      return batch_bo;
>  }
>  
> -#else
> -
>  static void
>  gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
>                                      struct encode_state *encode_state,
> @@ -1291,9 +1358,6 @@ gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
>      return mfc_context->mfc_batchbuffer_surface.bo;
>  }
>  
> -#endif
> -
> -
>  static void
>  gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
>                                   struct encode_state *encode_state,
> @@ -1308,11 +1372,15 @@ gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
>          return; 
>      }
>  
> +    if (encoder_context->quality_level == ENCODER_LOW_QUALITY )
> +        slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
> +    else {
>  #if __SOFTWARE__
> -    slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
> +        slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
>  #else
> -    slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
> +        slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
>  #endif
> +    }
>  
>      // begin programing
>      intel_batchbuffer_start_atomic_bcs(batch, 0x4000); 
> diff --git a/src/gen7_vme.c b/src/gen7_vme.c
> index 042fe5d..135a5ad 100644
> --- a/src/gen7_vme.c
> +++ b/src/gen7_vme.c
> @@ -63,6 +63,8 @@ enum VIDEO_CODING_TYPE{
>  enum AVC_VME_KERNEL_TYPE{ 
>      AVC_VME_INTRA_SHADER = 0,
>      AVC_VME_INTER_SHADER,
> +    AVC_VME_OLD_INTRA_SHADER,
> +    AVC_VME_OLD_INTER_SHADER,
>      AVC_VME_BATCHBUFFER,
>      AVC_VME_BINTER_SHADER,
>      AVC_VME_KERNEL_SUM
> @@ -83,6 +85,14 @@ static const uint32_t gen7_vme_inter_frame[][4] = {
>  #include "shaders/vme/inter_frame_ivb.g7b"
>  };
>  
> +static const uint32_t gen7_vme_old_intra_frame[][4] = {
> +#include "shaders/vme_old/intra_frame.g7b"
> +};
> +
> +static const uint32_t gen7_vme_old_inter_frame[][4] = {
> +#include "shaders/vme_old/inter_frame.g7b"
> +};
> +
>  static const uint32_t gen7_vme_batchbuffer[][4] = {
>  #include "shaders/vme/batchbuffer.g7b"
>  };
> @@ -107,6 +117,20 @@ static struct i965_kernel gen7_vme_kernels[] = {
>          NULL
>      },
>      {
> +        "AVC VME Old Intra Frame",
> +        AVC_VME_OLD_INTRA_SHADER,
> +        gen7_vme_old_intra_frame, 			
> +        sizeof(gen7_vme_old_intra_frame),		
> +        NULL
> +    },
> +    {
> +        "AVC VME Old Inter Frame",
> +        AVC_VME_OLD_INTER_SHADER,
> +        gen7_vme_old_inter_frame,
> +        sizeof(gen7_vme_old_inter_frame),
> +        NULL
> +    },
> +    {
>          "AVC VME BATCHBUFFER",
>          AVC_VME_BATCHBUFFER,
>          gen7_vme_batchbuffer,
> @@ -359,6 +383,36 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx,
>      return VA_STATUS_SUCCESS;
>  }
>  
> +static VAStatus
> +gen7_vme_vme_state_setup(VADriverContextP ctx,
> +        struct encode_state *encode_state,
> +        int is_intra,
> +        struct intel_encoder_context *encoder_context)
> +{
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    unsigned int *vme_state_message;
> +    int i;
> +
> +    //building VME state message
> +    dri_bo_map(vme_context->vme_state.bo, 1);
> +    assert(vme_context->vme_state.bo->virtual);
> +    vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
> +
> +    vme_state_message[0] = 0x10010101;
> +    vme_state_message[1] = 0x100F0F0F;
> +    vme_state_message[2] = 0x10010101;
> +    vme_state_message[3] = 0x000F0F0F;
> +    for(i = 4; i < 14; i++) {
> +        vme_state_message[i] = 0x00000000;
> +    }	
> +
> +    for(i = 14; i < 32; i++) {
> +        vme_state_message[i] = 0x00000000;
> +    }
> +
> +    dri_bo_unmap( vme_context->vme_state.bo);
> +    return VA_STATUS_SUCCESS;
> +}
>  
>  static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
>                                           struct encode_state *encode_state,
> @@ -598,25 +652,30 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx,
>      int s;
>      bool allow_hwscore = true;
>      int kernel_shader;
> -
> -    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> -        pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
> -        if ((pSliceParameter->macroblock_address % width_in_mbs)) {
> -            allow_hwscore = false;
> -            break;
> -	}
> +    unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
> +
> +    if (is_low_quality)
> +        allow_hwscore = false;
> +    else {
> +        for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +            pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer; 
> +            if ((pSliceParameter->macroblock_address % width_in_mbs)) {
> +                allow_hwscore = false;
> +                break;
> +            }
> +        }
>      }
>  
>      if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
>  	(pSliceParameter->slice_type == SLICE_TYPE_I)) {
> -	kernel_shader = AVC_VME_INTRA_SHADER;
> +	kernel_shader = (is_low_quality ? AVC_VME_OLD_INTRA_SHADER : AVC_VME_INTRA_SHADER);
>      } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
>                 (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
> -	kernel_shader = AVC_VME_INTER_SHADER;
> +	kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
>      } else {
>  	kernel_shader = AVC_VME_BINTER_SHADER;
>  	if (!allow_hwscore)
> -            kernel_shader = AVC_VME_INTER_SHADER;
> +            kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
>      }
>  
>      if (allow_hwscore)
> @@ -668,7 +727,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx,
>      gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
>      gen7_vme_interface_setup(ctx, encode_state, encoder_context);
>      gen7_vme_constant_setup(ctx, encode_state, encoder_context);
> -    gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
> +    if (encoder_context->quality_level == ENCODER_LOW_QUALITY)
> +        gen7_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
> +    else
> +        gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
>  
>      /*Programing media pipeline*/
>      gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
> index df625bb..54e657d 100755
> --- a/src/i965_drv_video.c
> +++ b/src/i965_drv_video.c
> @@ -629,6 +629,7 @@ i965_GetConfigAttributes(VADriverContextP ctx,
>                           int num_attribs)
>  {
>      VAStatus va_status;
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
>      int i;
>  
>      va_status = i965_validate_config(ctx, profile, entrypoint);
> @@ -674,6 +675,15 @@ i965_GetConfigAttributes(VADriverContextP ctx,
>  		break;
>  	    }
>  
> +	case VAConfigAttribEncQualityRange:
> +	    if (entrypoint == VAEntrypointEncSlice) {
> +		attrib_list[i].value = 1;
> +                if (profile == VAProfileH264ConstrainedBaseline &&
> +                    IS_GEN7(i965->intel.device_info))
> +                    attrib_list[i].value = ENCODER_QUALITY_RANGE;
> +		break;
> +	    }
> +
>          default:
>              /* Do nothing */
>              attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED;
> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
> index 63366cb..6d7d0fb 100644
> --- a/src/i965_drv_video.h
> +++ b/src/i965_drv_video.h
> @@ -65,6 +65,11 @@
>  #define DEFAULT_HUE             0
>  #define DEFAULT_SATURATION      10
>  
> +#define ENCODER_QUALITY_RANGE     2
> +#define ENCODER_DEFAULT_QUALITY   1
> +#define ENCODER_HIGH_QUALITY      ENCODER_DEFAULT_QUALITY
> +#define ENCODER_LOW_QUALITY       2
> +
>  struct i965_surface
>  {
>      struct object_base *base;
> diff --git a/src/i965_encoder.c b/src/i965_encoder.c
> index 14c37bb..126dcfc 100644
> --- a/src/i965_encoder.c
> +++ b/src/i965_encoder.c
> @@ -123,6 +123,30 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
>  }
>  
>  static VAStatus
> +intel_encoder_check_misc_parameter(VADriverContextP ctx,
> +                                  struct encode_state *encode_state,
> +                                  struct intel_encoder_context *encoder_context)
> +{
> +
> +    if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] &&
> +        encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
> +        VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer;
> +        VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
> +        encoder_context->quality_level = param_quality_level->quality_level;
> +
> +        if (encoder_context->quality_level == 0)
> +            encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
> +        else if (encoder_context->quality_level > encoder_context->quality_range)
> +            goto error;
> +   }
> +
> +    return VA_STATUS_SUCCESS;
> +
> +error:
> +    return VA_STATUS_ERROR_INVALID_PARAMETER;
> +}
> +
> +static VAStatus
>  intel_encoder_check_avc_parameter(VADriverContextP ctx,
>                                    struct encode_state *encode_state,
>                                    struct intel_encoder_context *encoder_context)
> @@ -278,6 +302,9 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
>  
>      vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
>  
> +    if (vaStatus == VA_STATUS_SUCCESS)
> +        vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context);
> +
>  out:    
>      return vaStatus;
>  }
> @@ -335,6 +362,8 @@ intel_enc_hw_context_init(VADriverContextP ctx,
>      encoder_context->input_yuv_surface = VA_INVALID_SURFACE;
>      encoder_context->is_tmp_id = 0;
>      encoder_context->rate_control_mode = VA_RC_NONE;
> +    encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
> +    encoder_context->quality_range = 1;
>  
>      switch (obj_config->profile) {
>      case VAProfileMPEG2Simple:
> @@ -395,7 +424,14 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
>  struct hw_context *
>  gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
>  {
> -    return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
> +    struct intel_encoder_context *encoder_context;
> +
> +    encoder_context = (struct intel_encoder_context *)intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
> +
> +    if (obj_config->profile == VAProfileH264ConstrainedBaseline)
> +        encoder_context->quality_range = ENCODER_QUALITY_RANGE;
> +
> +    return (struct hw_context *)encoder_context;
>  }
>  
>  struct hw_context *
> diff --git a/src/i965_encoder.h b/src/i965_encoder.h
> index 71396d6..20d49fc 100644
> --- a/src/i965_encoder.h
> +++ b/src/i965_encoder.h
> @@ -43,6 +43,8 @@ struct intel_encoder_context
>      VASurfaceID input_yuv_surface;
>      int is_tmp_id;
>      unsigned int rate_control_mode;
> +    unsigned int quality_level;
> +    unsigned int quality_range;
>      void *vme_context;
>      void *mfc_context;
>      void (*vme_context_destroy)(void *vme_context);




More information about the Libva mailing list