[Libva] [PATCH V2 2/2] Add multi quality levels encoding support for GEN7
Xiang, Haihao
haihao.xiang at intel.com
Thu Jun 5 08:23:43 PDT 2014
> -----Original Message-----
> From: Libva [mailto:libva-bounces at lists.freedesktop.org] On Behalf Of Zhong
> Li
> Sent: Thursday, June 05, 2014 3:08 PM
> To: libva at lists.freedesktop.org
> Subject: [Libva] [PATCH V2 2/2] Add multi quality levels encoding support for
> GEN7
>
> Two encoding quality levels are support on GEN7.
> Default quality level is set to be 1, which has better quality, but higher gpu
> usage and worse performance.
> The second quality level is set to be 2, which has lower gpu usage and better
> performance,but worse quality.
> Other platforms support for multi-quality-level will be added later.
>
> Signed-off-by: Zhong Li <zhong.li at intel.com>
> ---
> src/gen6_mfc.c | 130
> ++++++++++++++++++++++++++++++++++++++------------
> src/gen7_vme.c | 84 +++++++++++++++++++++++++++-----
> src/i965_drv_video.c | 12 +++++
> src/i965_drv_video.h | 7 +++
> src/i965_encoder.c | 21 +++++++-
> 5 files changed, 211 insertions(+), 43 deletions(-)
>
> diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c index 0a10054..f1cd7ea 100644
> --- a/src/gen6_mfc.c
> +++ b/src/gen6_mfc.c
> @@ -676,8 +676,6 @@ gen6_mfc_stop(VADriverContextP ctx,
> return VA_STATUS_SUCCESS;
> }
>
> -#if __SOFTWARE__
> -
> static int
> gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int
> end_mb, int qp,unsigned int *msg,
> struct intel_encoder_context
> *encoder_context, @@ -768,6 +766,58 @@
> gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb,
> in
>
> return len_in_dwords;
> }
> +
> +static int
> +gen6_mfc_avc_pak_object_inter2(VADriverContextP ctx, int x, int y, int
> end_mb, int qp,
> + unsigned int offset,
> + struct intel_encoder_context
> *encoder_context,
> + struct intel_batchbuffer *batch) {
> + struct gen6_vme_context *vme_context =
> encoder_context->vme_context;
> + int len_in_dwords = 11;
> +
> + if (batch == NULL)
> + batch = encoder_context->base.batch;
> +
> + BEGIN_BCS_BATCH(batch, len_in_dwords);
> +
> + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
> +
> + OUT_BCS_BATCH(batch, 32); /* 32 MV*/
> + OUT_BCS_BATCH(batch, offset);
> +
> + OUT_BCS_BATCH(batch,
> + (1 << 24) | /* PackedMvNum, Debug*/
> + (4 << 20) | /* 8 MV, SNB don't use it*/
> + (1 << 19) | /* CbpDcY */
> + (1 << 18) | /* CbpDcU */
> + (1 << 17) | /* CbpDcV */
> + (0 << 15) | /* Transform8x8Flag = 0*/
> + (0 << 14) | /* Frame based*/
> + (0 << 13) | /* Inter MB */
> + (1 << 8) | /* MbType = P_L0_16x16 */
> + (0 << 7) | /* MBZ for frame */
> + (0 << 6) | /* MBZ */
> + (2 << 4) | /* MBZ for inter*/
> + (0 << 3) | /* MBZ */
> + (0 << 2) | /* SkipMbFlag */
> + (0 << 0)); /* InterMbMode */
> +
> + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code
> Block Pattern for Y*/
> + OUT_BCS_BATCH(batch, 0x000F000F); /*
> Code Block Pattern */
> + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB
> */
> +
> + /*Stuff for Inter MB*/
> + OUT_BCS_BATCH(batch, 0x0);
> + OUT_BCS_BATCH(batch, 0x0);
> + OUT_BCS_BATCH(batch, 0x0);
> +
> + OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and
> + TargetSzieInWord*/
> +
> + ADVANCE_BCS_BATCH(batch);
> +
> + return len_in_dwords;
> +}
>
> static void
> gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx, @@ -791,6
> +841,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
> unsigned int tail_data[] = { 0x0, 0x0 };
> int slice_type =
> intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
> int is_intra = slice_type == SLICE_TYPE_I;
> + int is_low_quality = (encode_state->quality_level ==
> + ENCODER_LOW_QUALITY);
>
> if (rate_control_mode == VA_RC_CBR) {
> qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> @@ -818,36 +869,54 @@
> gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
> dri_bo_map(vme_context->vme_output.bo , 1);
> msg = (unsigned int *)vme_context->vme_output.bo->virtual;
>
> - if (is_intra) {
> - msg += pSliceParameter->macroblock_address *
> INTRA_VME_OUTPUT_IN_DWS;
> - } else {
> - msg += pSliceParameter->macroblock_address *
> INTER_VME_OUTPUT_IN_DWS;
> - msg += 32; /* the first 32 DWs are MVs */
> - offset = pSliceParameter->macroblock_address *
> INTER_VME_OUTPUT_IN_BYTES;
> - }
> -
> - for (i = pSliceParameter->macroblock_address;
> - i < pSliceParameter->macroblock_address +
> pSliceParameter->num_macroblocks; i++) {
> - int last_mb = (i == (pSliceParameter->macroblock_address +
> pSliceParameter->num_macroblocks - 1) );
> - x = i % width_in_mbs;
> - y = i / width_in_mbs;
> + if (is_low_quality) {
> + for (i = pSliceParameter->macroblock_address;
> + i < pSliceParameter->macroblock_address +
> pSliceParameter->num_macroblocks; i++) {
> + int last_mb = (i == (pSliceParameter->macroblock_address +
> pSliceParameter->num_macroblocks - 1) );
> + x = i % width_in_mbs;
> + y = i / width_in_mbs;
>
> + if (is_intra) {
> + assert(msg);
> + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg,
> encoder_context, 0, 0, slice_batch);
> + msg += 4;
> + } else {
> + gen6_mfc_avc_pak_object_inter2(ctx, x, y, last_mb, qp,
> offset, encoder_context, slice_batch);
> + offset += 64;
> + }
> + }
> + } else {
> if (is_intra) {
> - assert(msg);
> - gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg,
> encoder_context, 0, 0, slice_batch);
> - msg += INTRA_VME_OUTPUT_IN_DWS;
> + msg += pSliceParameter->macroblock_address *
> + INTRA_VME_OUTPUT_IN_DWS;
> } else {
> - if (msg[0] & INTRA_MB_FLAG_MASK) {
> + msg += pSliceParameter->macroblock_address *
> INTER_VME_OUTPUT_IN_DWS;
> + msg += 32; /* the first 32 DWs are MVs */
> + offset = pSliceParameter->macroblock_address *
> INTER_VME_OUTPUT_IN_BYTES;
> + }
> +
> + for (i = pSliceParameter->macroblock_address;
> + i < pSliceParameter->macroblock_address +
> pSliceParameter->num_macroblocks; i++) {
> + int last_mb = (i == (pSliceParameter->macroblock_address +
> pSliceParameter->num_macroblocks - 1) );
> + x = i % width_in_mbs;
> + y = i / width_in_mbs;
> +
> + if (is_intra) {
> + assert(msg);
> gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg,
> encoder_context, 0, 0, slice_batch);
> + msg += INTRA_VME_OUTPUT_IN_DWS;
> } else {
> - gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg,
> offset, encoder_context, 0, 0, slice_type, slice_batch);
> - }
> + if (msg[0] & INTRA_MB_FLAG_MASK) {
> + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp,
> msg, encoder_context, 0, 0, slice_batch);
> + } else {
> + gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp,
> msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
> + }
>
> - msg += INTER_VME_OUTPUT_IN_DWS;
> - offset += INTER_VME_OUTPUT_IN_BYTES;
> + msg += INTER_VME_OUTPUT_IN_DWS;
> + offset += INTER_VME_OUTPUT_IN_BYTES;
> + }
> }
> }
> -
> +
> dri_bo_unmap(vme_context->vme_output.bo);
>
> if ( last_slice ) {
> @@ -896,8 +965,6 @@
> gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
> return batch_bo;
> }
>
> -#else
> -
> static void
> gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
> struct encode_state *encode_state,
> @@ -1291,9 +1358,6 @@
> gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
> return mfc_context->mfc_batchbuffer_surface.bo;
> }
>
> -#endif
> -
> -
> static void
> gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
> struct encode_state *encode_state,
> @@ -1308,11 +1372,15 @@
> gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
> return;
> }
>
> + if (encode_state->quality_level == ENCODER_LOW_QUALITY )
> + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx,
> encode_state, encoder_context);
> + else {
> #if __SOFTWARE__
> - slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state,
> encoder_context);
> + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx,
> + encode_state, encoder_context);
> #else
> - slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state,
> encoder_context);
> + slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx,
> + encode_state, encoder_context);
> #endif
> + }
>
> // begin programing
> intel_batchbuffer_start_atomic_bcs(batch, 0x4000); diff --git
> a/src/gen7_vme.c b/src/gen7_vme.c index 042fe5d..b45f558 100644
> --- a/src/gen7_vme.c
> +++ b/src/gen7_vme.c
> @@ -63,6 +63,8 @@ enum VIDEO_CODING_TYPE{ enum
> AVC_VME_KERNEL_TYPE{
> AVC_VME_INTRA_SHADER = 0,
> AVC_VME_INTER_SHADER,
> + AVC_VME_OLD_INTRA_SHADER,
> + AVC_VME_OLD_INTER_SHADER,
> AVC_VME_BATCHBUFFER,
> AVC_VME_BINTER_SHADER,
> AVC_VME_KERNEL_SUM
> @@ -83,6 +85,14 @@ static const uint32_t gen7_vme_inter_frame[][4] =
> { #include "shaders/vme/inter_frame_ivb.g7b"
> };
>
> +static const uint32_t gen7_vme_old_intra_frame[][4] = { #include
> +"shaders/vme_old/intra_frame.g7b"
> +};
> +
> +static const uint32_t gen7_vme_old_inter_frame[][4] = { #include
> +"shaders/vme_old/inter_frame.g7b"
> +};
> +
> static const uint32_t gen7_vme_batchbuffer[][4] = { #include
> "shaders/vme/batchbuffer.g7b"
> };
> @@ -107,6 +117,20 @@ static struct i965_kernel gen7_vme_kernels[] = {
> NULL
> },
> {
> + "AVC VME Old Intra Frame",
> + AVC_VME_OLD_INTRA_SHADER,
> + gen7_vme_old_intra_frame,
> + sizeof(gen7_vme_old_intra_frame),
> + NULL
> + },
> + {
> + "AVC VME Old Inter Frame",
> + AVC_VME_OLD_INTER_SHADER,
> + gen7_vme_old_inter_frame,
> + sizeof(gen7_vme_old_inter_frame),
> + NULL
> + },
> + {
> "AVC VME BATCHBUFFER",
> AVC_VME_BATCHBUFFER,
> gen7_vme_batchbuffer,
> @@ -359,6 +383,36 @@ static VAStatus
> gen7_vme_constant_setup(VADriverContextP ctx,
> return VA_STATUS_SUCCESS;
> }
>
> +static VAStatus
> +gen7_vme_vme_state_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int is_intra,
> + struct intel_encoder_context *encoder_context) {
> + struct gen6_vme_context *vme_context =
> encoder_context->vme_context;
> + unsigned int *vme_state_message;
> + int i;
> +
> + //building VME state message
> + dri_bo_map(vme_context->vme_state.bo, 1);
> + assert(vme_context->vme_state.bo->virtual);
> + vme_state_message = (unsigned int
> + *)vme_context->vme_state.bo->virtual;
> +
> + vme_state_message[0] = 0x10010101;
> + vme_state_message[1] = 0x100F0F0F;
> + vme_state_message[2] = 0x10010101;
> + vme_state_message[3] = 0x000F0F0F;
> + for(i = 4; i < 14; i++) {
> + vme_state_message[i] = 0x00000000;
> + }
> +
> + for(i = 14; i < 32; i++) {
> + vme_state_message[i] = 0x00000000;
> + }
> +
> + dri_bo_unmap( vme_context->vme_state.bo);
> + return VA_STATUS_SUCCESS;
> +}
>
> static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
> struct encode_state
> *encode_state, @@ -598,25 +652,30 @@ static void
> gen7_vme_pipeline_programing(VADriverContextP ctx,
> int s;
> bool allow_hwscore = true;
> int kernel_shader;
> -
> - for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> - pSliceParameter = (VAEncSliceParameterBufferH264
> *)encode_state->slice_params_ext[s]->buffer;
> - if ((pSliceParameter->macroblock_address % width_in_mbs)) {
> - allow_hwscore = false;
> - break;
> - }
> + unsigned int is_low_quality = (encode_state->quality_level ==
> + ENCODER_LOW_QUALITY);
> +
> + if (is_low_quality)
> + allow_hwscore = false;
> + else {
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + pSliceParameter = (VAEncSliceParameterBufferH264
> *)encode_state->slice_params_ext[s]->buffer;
> + if ((pSliceParameter->macroblock_address % width_in_mbs)) {
> + allow_hwscore = false;
> + break;
> + }
> + }
> }
>
> if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
> (pSliceParameter->slice_type == SLICE_TYPE_I)) {
> - kernel_shader = AVC_VME_INTRA_SHADER;
> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTRA_SHADER :
> +AVC_VME_INTRA_SHADER);
> } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
> (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
> - kernel_shader = AVC_VME_INTER_SHADER;
> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER :
> +AVC_VME_INTER_SHADER);
> } else {
> kernel_shader = AVC_VME_BINTER_SHADER;
> if (!allow_hwscore)
> - kernel_shader = AVC_VME_INTER_SHADER;
> + kernel_shader = (is_low_quality ?
> AVC_VME_OLD_INTER_SHADER
> + : AVC_VME_INTER_SHADER);
> }
>
> if (allow_hwscore)
> @@ -668,7 +727,10 @@ static VAStatus
> gen7_vme_prepare(VADriverContextP ctx,
> gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
> gen7_vme_interface_setup(ctx, encode_state, encoder_context);
> gen7_vme_constant_setup(ctx, encode_state, encoder_context);
> - gen7_vme_avc_state_setup(ctx, encode_state, is_intra,
> encoder_context);
> + if (encode_state->quality_level == ENCODER_LOW_QUALITY)
> + gen7_vme_vme_state_setup(ctx, encode_state, is_intra,
> encoder_context);
> + else
> + gen7_vme_avc_state_setup(ctx, encode_state, is_intra,
> + encoder_context);
>
> /*Programing media pipeline*/
> gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c index
> fcbab79..6eaf6ca 100755
> --- a/src/i965_drv_video.c
> +++ b/src/i965_drv_video.c
> @@ -621,6 +621,7 @@ i965_GetConfigAttributes(VADriverContextP ctx,
> int num_attribs) {
> VAStatus va_status;
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> int i;
>
> va_status = i965_validate_config(ctx, profile, entrypoint); @@ -665,6
> +666,15 @@ i965_GetConfigAttributes(VADriverContextP ctx,
> break;
> }
>
> + case VAConfigAttribEncQualityRange:
> + if (entrypoint == VAEntrypointEncSlice) {
> + attrib_list[i].value = 1;
> + if (profile == VAProfileH264ConstrainedBaseline &&
> + IS_GEN7(i965->intel.device_info))
> + attrib_list[i].value = ENCODER_QUALITY_RANGE;
> + break;
> + }
> +
> default:
> /* Do nothing */
> attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED; @@ -2214,6
> +2224,8 @@ i965_BeginPicture(VADriverContextP ctx,
> obj_context->codec_state.encode.num_slice_params_ext = 0;
> obj_context->codec_state.encode.current_render_target =
> render_target; /*This is input new frame*/
> obj_context->codec_state.encode.last_packed_header_type = 0;
> + obj_context->codec_state.encode.quality_level =
> + ENCODER_DEFAULT_QUALITY;
It is reasonable for user to only set quality level once and then doesn't
change the level any more. For this case, other frames will always use the default setting if initializing
obj_context->codec_state.encode.quality_level here.
> +
> memset(obj_context->codec_state.encode.slice_rawdata_index, 0,
> sizeof(int) * obj_context->codec_state.encode.slice_num);
> memset(obj_context->codec_state.encode.slice_rawdata_count, 0,
> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h index
> 418c277..29aa10b 100644
> --- a/src/i965_drv_video.h
> +++ b/src/i965_drv_video.h
> @@ -65,6 +65,11 @@
> #define DEFAULT_HUE 0
> #define DEFAULT_SATURATION 10
>
> +#define ENCODER_QUALITY_RANGE 2
> +#define ENCODER_DEFAULT_QUALITY 1
> +#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY
> +#define ENCODER_LOW_QUALITY 2
> +
> struct i965_surface
> {
> struct object_base *base;
> @@ -172,6 +177,8 @@ struct encode_state
>
> struct buffer_store *misc_param[16];
>
> + unsigned int quality_level;
> +
> VASurfaceID current_render_target;
> struct object_surface *input_yuv_object;
> struct object_surface *reconstructed_object; diff --git
> a/src/i965_encoder.c b/src/i965_encoder.c index 14c37bb..bcaa61c 100644
> --- a/src/i965_encoder.c
> +++ b/src/i965_encoder.c
> @@ -124,6 +124,7 @@ intel_encoder_check_yuv_surface(VADriverContextP
> ctx,
>
> static VAStatus
> intel_encoder_check_avc_parameter(VADriverContextP ctx,
> + VAProfile profile,
> struct encode_state *encode_state,
> struct intel_encoder_context
> *encoder_context) { @@ -151,6 +152,24 @@
> intel_encoder_check_avc_parameter(VADriverContextP ctx,
> if (!obj_buffer || !obj_buffer->buffer_store
> || !obj_buffer->buffer_store->bo)
> goto error;
>
> + if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]
> &&
> +
> encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
> + VAEncMiscParameterBuffer* pMiscParam =
> (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParamete
> rTypeQualityLevel]->buffer;
> + VAEncMiscParameterBufferQualityLevel* param_quality_level =
> (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
> + encode_state->quality_level =
> + param_quality_level->quality_level;
> +
> + if (encode_state->quality_level == 0)
> + encode_state->quality_level = ENCODER_DEFAULT_QUALITY;
> + else if (encode_state->quality_level == ENCODER_LOW_QUALITY &&
> + (profile != VAProfileH264ConstrainedBaseline ||
> + encoder_context->rate_control_mode != VA_RC_CQP)) {
> + /* low quality level mode only support baseline profile and CQP
> rate control */
It doesn't match vaGetConfigAttributes()
1. vaGetConfigAttributes() with VAConfigAttribEncQualityRange returns 2 for VAProfileH264ConstrainedBaseline
On GEN7, which means the driver supports 2 levels no matter the rate control mode.
2. vaGetConfigAttributes() with VAConfigAttribEncQualityRange returns 1 on GEN6. However here it will pass if user
set level to 2 for VAProfileH264ConstrainedBaseline and CQP mode on GEN6
It would be better to track the range per context, only check the level against the range and we can use a single
function both for MPEG-2, H.264 and new codec later.
> + goto error;
> + }
> + else if (encode_state->quality_level > ENCODER_QUALITY_RANGE)
> + goto error;
> + }
> +
> encode_state->coded_buf_object = obj_buffer;
>
> for (i = 0; i < 16; i++) {
> @@ -260,7 +279,7 @@ intel_encoder_sanity_check_input(VADriverContextP
> ctx,
> case VAProfileH264High:
> case VAProfileH264MultiviewHigh:
> case VAProfileH264StereoHigh:
> - vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state,
> encoder_context);
> + vaStatus = intel_encoder_check_avc_parameter(ctx, profile,
> + encode_state, encoder_context);
> break;
>
> case VAProfileMPEG2Simple:
> --
> 1.7.9.5
>
> _______________________________________________
> Libva mailing list
> Libva at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/libva
More information about the Libva
mailing list