[Libva] [PATCH 2/3] Add multi quality levels encoding support for GEN7
Xiang, Haihao
haihao.xiang at intel.com
Wed Jun 4 21:12:05 PDT 2014
On Wed, 2014-06-04 at 21:37 +0800, Zhong Li wrote:
> Two encoding quality levels are support on GEN7.
> Default quality level is set to be 1, which has better quality,
> but higher gpu usage and worse performance.
> The second quality level is set to be 2, which has lower gpu usage and
> better performance,but worse quality.
> Other platforms support for multi-quality-level will be added later.
>
> Signed-off-by: Zhong Li <zhong.li at intel.com>
> ---
> src/gen6_mfc.c | 130 ++++++++++++++++++++++++++++++++++++++------------
> src/gen7_vme.c | 84 +++++++++++++++++++++++++++-----
> src/i965_drv_video.c | 12 +++++
> src/i965_drv_video.h | 7 +++
> src/i965_encoder.c | 21 +++++++-
> 5 files changed, 211 insertions(+), 43 deletions(-)
>
> diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
> index 0a10054..f1cd7ea 100644
> --- a/src/gen6_mfc.c
> +++ b/src/gen6_mfc.c
> @@ -676,8 +676,6 @@ gen6_mfc_stop(VADriverContextP ctx,
> return VA_STATUS_SUCCESS;
> }
>
> -#if __SOFTWARE__
> -
> static int
> gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
> struct intel_encoder_context *encoder_context,
> @@ -768,6 +766,58 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in
>
> return len_in_dwords;
> }
> +
> +static int
> +gen6_mfc_avc_pak_object_inter2(VADriverContextP ctx, int x, int y, int end_mb, int qp,
> + unsigned int offset,
> + struct intel_encoder_context *encoder_context,
> + struct intel_batchbuffer *batch)
> +{
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + int len_in_dwords = 11;
> +
> + if (batch == NULL)
> + batch = encoder_context->base.batch;
> +
> + BEGIN_BCS_BATCH(batch, len_in_dwords);
> +
> + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
> +
> + OUT_BCS_BATCH(batch, 32); /* 32 MV*/
> + OUT_BCS_BATCH(batch, offset);
> +
> + OUT_BCS_BATCH(batch,
> + (1 << 24) | /* PackedMvNum, Debug*/
> + (4 << 20) | /* 8 MV, SNB don't use it*/
> + (1 << 19) | /* CbpDcY */
> + (1 << 18) | /* CbpDcU */
> + (1 << 17) | /* CbpDcV */
> + (0 << 15) | /* Transform8x8Flag = 0*/
> + (0 << 14) | /* Frame based*/
> + (0 << 13) | /* Inter MB */
> + (1 << 8) | /* MbType = P_L0_16x16 */
> + (0 << 7) | /* MBZ for frame */
> + (0 << 6) | /* MBZ */
> + (2 << 4) | /* MBZ for inter*/
> + (0 << 3) | /* MBZ */
> + (0 << 2) | /* SkipMbFlag */
> + (0 << 0)); /* InterMbMode */
> +
> + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
> + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
> + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
> +
> + /*Stuff for Inter MB*/
> + OUT_BCS_BATCH(batch, 0x0);
> + OUT_BCS_BATCH(batch, 0x0);
> + OUT_BCS_BATCH(batch, 0x0);
> +
> + OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
> +
> + ADVANCE_BCS_BATCH(batch);
> +
> + return len_in_dwords;
> +}
>
> static void
> gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
> @@ -791,6 +841,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
> unsigned int tail_data[] = { 0x0, 0x0 };
> int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
> int is_intra = slice_type == SLICE_TYPE_I;
> + int is_low_quality = (encode_state->quality_level == ENCODER_LOW_QUALITY);
>
> if (rate_control_mode == VA_RC_CBR) {
> qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> @@ -818,36 +869,54 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
> dri_bo_map(vme_context->vme_output.bo , 1);
> msg = (unsigned int *)vme_context->vme_output.bo->virtual;
>
> - if (is_intra) {
> - msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
> - } else {
> - msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
> - msg += 32; /* the first 32 DWs are MVs */
> - offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
> - }
> -
> - for (i = pSliceParameter->macroblock_address;
> - i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
> - int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
> - x = i % width_in_mbs;
> - y = i / width_in_mbs;
> + if (is_low_quality) {
> + for (i = pSliceParameter->macroblock_address;
> + i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
> + int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
> + x = i % width_in_mbs;
> + y = i / width_in_mbs;
>
> + if (is_intra) {
> + assert(msg);
> + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> + msg += 4;
> + } else {
> + gen6_mfc_avc_pak_object_inter2(ctx, x, y, last_mb, qp, offset, encoder_context, slice_batch);
> + offset += 64;
> + }
> + }
> + } else {
> if (is_intra) {
> - assert(msg);
> - gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> - msg += INTRA_VME_OUTPUT_IN_DWS;
> + msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
> } else {
> - if (msg[0] & INTRA_MB_FLAG_MASK) {
> + msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
> + msg += 32; /* the first 32 DWs are MVs */
> + offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
> + }
> +
> + for (i = pSliceParameter->macroblock_address;
> + i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
> + int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
> + x = i % width_in_mbs;
> + y = i / width_in_mbs;
> +
> + if (is_intra) {
> + assert(msg);
> gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> + msg += INTRA_VME_OUTPUT_IN_DWS;
> } else {
> - gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
> - }
> + if (msg[0] & INTRA_MB_FLAG_MASK) {
> + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
> + } else {
> + gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
> + }
>
> - msg += INTER_VME_OUTPUT_IN_DWS;
> - offset += INTER_VME_OUTPUT_IN_BYTES;
> + msg += INTER_VME_OUTPUT_IN_DWS;
> + offset += INTER_VME_OUTPUT_IN_BYTES;
> + }
> }
> }
> -
> +
> dri_bo_unmap(vme_context->vme_output.bo);
>
> if ( last_slice ) {
> @@ -896,8 +965,6 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
> return batch_bo;
> }
>
> -#else
> -
> static void
> gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
> struct encode_state *encode_state,
> @@ -1291,9 +1358,6 @@ gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
> return mfc_context->mfc_batchbuffer_surface.bo;
> }
>
> -#endif
> -
> -
> static void
> gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
> struct encode_state *encode_state,
> @@ -1308,11 +1372,15 @@ gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
> return;
> }
>
> + if (encode_state->quality_level == ENCODER_LOW_QUALITY )
> + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
> + else {
> #if __SOFTWARE__
> - slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
> + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
> #else
> - slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
> + slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
> #endif
> + }
>
> // begin programing
> intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
> diff --git a/src/gen7_vme.c b/src/gen7_vme.c
> index 042fe5d..b45f558 100644
> --- a/src/gen7_vme.c
> +++ b/src/gen7_vme.c
> @@ -63,6 +63,8 @@ enum VIDEO_CODING_TYPE{
> enum AVC_VME_KERNEL_TYPE{
> AVC_VME_INTRA_SHADER = 0,
> AVC_VME_INTER_SHADER,
> + AVC_VME_OLD_INTRA_SHADER,
> + AVC_VME_OLD_INTER_SHADER,
> AVC_VME_BATCHBUFFER,
> AVC_VME_BINTER_SHADER,
> AVC_VME_KERNEL_SUM
> @@ -83,6 +85,14 @@ static const uint32_t gen7_vme_inter_frame[][4] = {
> #include "shaders/vme/inter_frame_ivb.g7b"
> };
>
> +static const uint32_t gen7_vme_old_intra_frame[][4] = {
> +#include "shaders/vme_old/intra_frame.g7b"
> +};
> +
> +static const uint32_t gen7_vme_old_inter_frame[][4] = {
> +#include "shaders/vme_old/inter_frame.g7b"
> +};
> +
> static const uint32_t gen7_vme_batchbuffer[][4] = {
> #include "shaders/vme/batchbuffer.g7b"
> };
> @@ -107,6 +117,20 @@ static struct i965_kernel gen7_vme_kernels[] = {
> NULL
> },
> {
> + "AVC VME Old Intra Frame",
> + AVC_VME_OLD_INTRA_SHADER,
> + gen7_vme_old_intra_frame,
> + sizeof(gen7_vme_old_intra_frame),
> + NULL
> + },
> + {
> + "AVC VME Old Inter Frame",
> + AVC_VME_OLD_INTER_SHADER,
> + gen7_vme_old_inter_frame,
> + sizeof(gen7_vme_old_inter_frame),
> + NULL
> + },
> + {
> "AVC VME BATCHBUFFER",
> AVC_VME_BATCHBUFFER,
> gen7_vme_batchbuffer,
> @@ -359,6 +383,36 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx,
> return VA_STATUS_SUCCESS;
> }
>
> +static VAStatus
> +gen7_vme_vme_state_setup(VADriverContextP ctx,
> + struct encode_state *encode_state,
> + int is_intra,
> + struct intel_encoder_context *encoder_context)
> +{
> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
> + unsigned int *vme_state_message;
> + int i;
> +
> + //building VME state message
> + dri_bo_map(vme_context->vme_state.bo, 1);
> + assert(vme_context->vme_state.bo->virtual);
> + vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
> +
> + vme_state_message[0] = 0x10010101;
> + vme_state_message[1] = 0x100F0F0F;
> + vme_state_message[2] = 0x10010101;
> + vme_state_message[3] = 0x000F0F0F;
> + for(i = 4; i < 14; i++) {
> + vme_state_message[i] = 0x00000000;
> + }
> +
> + for(i = 14; i < 32; i++) {
> + vme_state_message[i] = 0x00000000;
> + }
> +
> + dri_bo_unmap( vme_context->vme_state.bo);
> + return VA_STATUS_SUCCESS;
> +}
>
> static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
> struct encode_state *encode_state,
> @@ -598,25 +652,30 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx,
> int s;
> bool allow_hwscore = true;
> int kernel_shader;
> -
> - for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> - if ((pSliceParameter->macroblock_address % width_in_mbs)) {
> - allow_hwscore = false;
> - break;
> - }
> + unsigned int is_low_quality = (encode_state->quality_level == ENCODER_LOW_QUALITY);
> +
> + if (is_low_quality)
> + allow_hwscore = false;
> + else {
> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
> + if ((pSliceParameter->macroblock_address % width_in_mbs)) {
> + allow_hwscore = false;
> + break;
> + }
> + }
> }
>
> if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
> (pSliceParameter->slice_type == SLICE_TYPE_I)) {
> - kernel_shader = AVC_VME_INTRA_SHADER;
> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTRA_SHADER : AVC_VME_INTRA_SHADER);
> } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
> (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
> - kernel_shader = AVC_VME_INTER_SHADER;
> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
> } else {
> kernel_shader = AVC_VME_BINTER_SHADER;
> if (!allow_hwscore)
> - kernel_shader = AVC_VME_INTER_SHADER;
> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
> }
>
> if (allow_hwscore)
> @@ -668,7 +727,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx,
> gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
> gen7_vme_interface_setup(ctx, encode_state, encoder_context);
> gen7_vme_constant_setup(ctx, encode_state, encoder_context);
> - gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
> + if (encode_state->quality_level == ENCODER_LOW_QUALITY)
> + gen7_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
> + else
> + gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
>
> /*Programing media pipeline*/
> gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
> index fcbab79..6eaf6ca 100755
> --- a/src/i965_drv_video.c
> +++ b/src/i965_drv_video.c
> @@ -621,6 +621,7 @@ i965_GetConfigAttributes(VADriverContextP ctx,
> int num_attribs)
> {
> VAStatus va_status;
> + struct i965_driver_data *i965 = i965_driver_data(ctx);
> int i;
>
> va_status = i965_validate_config(ctx, profile, entrypoint);
> @@ -665,6 +666,15 @@ i965_GetConfigAttributes(VADriverContextP ctx,
> break;
> }
>
> + case VAConfigAttribEncQualityRange:
> + if (entrypoint == VAEntrypointEncSlice) {
> + attrib_list[i].value = 1;
> + if (profile == VAProfileH264ConstrainedBaseline &&
> + IS_GEN7(i965->intel.device_info))
Does the low quality shader support Main Profile ?
> + attrib_list[i].value = ENCODER_QUALITY_RANGE;
> + break;
> + }
> +
> default:
> /* Do nothing */
> attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED;
> @@ -2214,6 +2224,8 @@ i965_BeginPicture(VADriverContextP ctx,
> obj_context->codec_state.encode.num_slice_params_ext = 0;
> obj_context->codec_state.encode.current_render_target = render_target; /*This is input new frame*/
> obj_context->codec_state.encode.last_packed_header_type = 0;
> + obj_context->codec_state.encode.quality_level = ENCODER_DEFAULT_QUALITY;
> +
> memset(obj_context->codec_state.encode.slice_rawdata_index, 0,
> sizeof(int) * obj_context->codec_state.encode.slice_num);
> memset(obj_context->codec_state.encode.slice_rawdata_count, 0,
> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
> index 418c277..29aa10b 100644
> --- a/src/i965_drv_video.h
> +++ b/src/i965_drv_video.h
> @@ -65,6 +65,11 @@
> #define DEFAULT_HUE 0
> #define DEFAULT_SATURATION 10
>
> +#define ENCODER_QUALITY_RANGE 2
> +#define ENCODER_DEFAULT_QUALITY 1
> +#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY
> +#define ENCODER_LOW_QUALITY 2
> +
> struct i965_surface
> {
> struct object_base *base;
> @@ -172,6 +177,8 @@ struct encode_state
>
> struct buffer_store *misc_param[16];
>
> + unsigned int quality_level;
> +
> VASurfaceID current_render_target;
> struct object_surface *input_yuv_object;
> struct object_surface *reconstructed_object;
> diff --git a/src/i965_encoder.c b/src/i965_encoder.c
> index 14c37bb..bcaa61c 100644
> --- a/src/i965_encoder.c
> +++ b/src/i965_encoder.c
> @@ -124,6 +124,7 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
>
> static VAStatus
> intel_encoder_check_avc_parameter(VADriverContextP ctx,
> + VAProfile profile,
> struct encode_state *encode_state,
> struct intel_encoder_context *encoder_context)
This function is used to validate sequence, picture and slice parameters
from user. so it is not suitable to check misc parameter here.
> {
> @@ -151,6 +152,24 @@ intel_encoder_check_avc_parameter(VADriverContextP ctx,
> if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo)
> goto error;
>
> + if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] &&
> + encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
> + VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer;
> + VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
> + encode_state->quality_level = param_quality_level->quality_level;
> +
> + if (encode_state->quality_level == 0)
> + encode_state->quality_level = ENCODER_DEFAULT_QUALITY;
> + else if (encode_state->quality_level == ENCODER_LOW_QUALITY &&
> + (profile != VAProfileH264ConstrainedBaseline ||
> + encoder_context->rate_control_mode != VA_RC_CQP)) {
> + /* low quality level mode only support baseline profile and CQP rate control */
> + goto error;
This is the limitation for low quality shader on GEN7, it would be
better to move the code to GEN7 related file(s).
> + }
> + else if (encode_state->quality_level > ENCODER_QUALITY_RANGE)
> + goto error;
> + }
> +
> encode_state->coded_buf_object = obj_buffer;
>
> for (i = 0; i < 16; i++) {
> @@ -260,7 +279,7 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
> case VAProfileH264High:
> case VAProfileH264MultiviewHigh:
> case VAProfileH264StereoHigh:
> - vaStatus = intel_encoder_check_avc_parameter(ctx, encode_state, encoder_context);
> + vaStatus = intel_encoder_check_avc_parameter(ctx, profile, encode_state, encoder_context);
> break;
>
> case VAProfileMPEG2Simple:
More information about the Libva
mailing list