[Libva] [PATCH V3 2/2] Add multi quality levels encoding support for GEN7
lizhong
zhong.li at intel.com
Tue Jun 10 22:32:52 PDT 2014
Thanks.
Zhong
On 06/11/2014 01:30 PM, Zhao, Yakui wrote:
> On Tue, 2014-06-10 at 20:49 -0600, Zhong Li wrote:
>> Two encoding quality levels are support on GEN7.
>> Default quality level is set to be 1, which has better quality,
>> but higher gpu usage and worse performance.
>> The second quality level is set to be 2, which has lower gpu usage and
>> better performance,but worse quality.
>> Other platforms support for multi-quality-level will be added later.
>>
>> v1->v2: 1. follow haihao's comments to init and check quality_level.
>> 2. remove CBR limitation for low quality level.
>>
> This will be pushed.
>
> Thanks.
> Yakui
>
>> Signed-off-by: Zhong Li <zhong.li at intel.com>
>> ---
>> src/gen6_mfc.c | 130 ++++++++++++++++++++++++++++++++++++++------------
>> src/gen7_vme.c | 84 +++++++++++++++++++++++++++-----
>> src/i965_drv_video.c | 10 ++++
>> src/i965_drv_video.h | 5 ++
>> src/i965_encoder.c | 38 ++++++++++++++-
>> src/i965_encoder.h | 2 +
>> 6 files changed, 226 insertions(+), 43 deletions(-)
>>
>> diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
>> index 0a10054..30cdb66 100644
>> --- a/src/gen6_mfc.c
>> +++ b/src/gen6_mfc.c
>> @@ -676,8 +676,6 @@ gen6_mfc_stop(VADriverContextP ctx,
>> return VA_STATUS_SUCCESS;
>> }
>>
>> -#if __SOFTWARE__
>> -
>> static int
>> gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
>> struct intel_encoder_context *encoder_context,
>> @@ -768,6 +766,58 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in
>>
>> return len_in_dwords;
>> }
>> +
>> +static int
>> +gen6_mfc_avc_pak_object_inter2(VADriverContextP ctx, int x, int y, int end_mb, int qp,
>> + unsigned int offset,
>> + struct intel_encoder_context *encoder_context,
>> + struct intel_batchbuffer *batch)
>> +{
>> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
>> + int len_in_dwords = 11;
>> +
>> + if (batch == NULL)
>> + batch = encoder_context->base.batch;
>> +
>> + BEGIN_BCS_BATCH(batch, len_in_dwords);
>> +
>> + OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
>> +
>> + OUT_BCS_BATCH(batch, 32); /* 32 MV*/
>> + OUT_BCS_BATCH(batch, offset);
>> +
>> + OUT_BCS_BATCH(batch,
>> + (1 << 24) | /* PackedMvNum, Debug*/
>> + (4 << 20) | /* 8 MV, SNB don't use it*/
>> + (1 << 19) | /* CbpDcY */
>> + (1 << 18) | /* CbpDcU */
>> + (1 << 17) | /* CbpDcV */
>> + (0 << 15) | /* Transform8x8Flag = 0*/
>> + (0 << 14) | /* Frame based*/
>> + (0 << 13) | /* Inter MB */
>> + (1 << 8) | /* MbType = P_L0_16x16 */
>> + (0 << 7) | /* MBZ for frame */
>> + (0 << 6) | /* MBZ */
>> + (2 << 4) | /* MBZ for inter*/
>> + (0 << 3) | /* MBZ */
>> + (0 << 2) | /* SkipMbFlag */
>> + (0 << 0)); /* InterMbMode */
>> +
>> + OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
>> + OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
>> + OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
>> +
>> + /*Stuff for Inter MB*/
>> + OUT_BCS_BATCH(batch, 0x0);
>> + OUT_BCS_BATCH(batch, 0x0);
>> + OUT_BCS_BATCH(batch, 0x0);
>> +
>> + OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
>> +
>> + ADVANCE_BCS_BATCH(batch);
>> +
>> + return len_in_dwords;
>> +}
>>
>> static void
>> gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
>> @@ -791,6 +841,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
>> unsigned int tail_data[] = { 0x0, 0x0 };
>> int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
>> int is_intra = slice_type == SLICE_TYPE_I;
>> + int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
>>
>> if (rate_control_mode == VA_RC_CBR) {
>> qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
>> @@ -818,36 +869,54 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
>> dri_bo_map(vme_context->vme_output.bo , 1);
>> msg = (unsigned int *)vme_context->vme_output.bo->virtual;
>>
>> - if (is_intra) {
>> - msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
>> - } else {
>> - msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
>> - msg += 32; /* the first 32 DWs are MVs */
>> - offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
>> - }
>> -
>> - for (i = pSliceParameter->macroblock_address;
>> - i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
>> - int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
>> - x = i % width_in_mbs;
>> - y = i / width_in_mbs;
>> + if (is_low_quality) {
>> + for (i = pSliceParameter->macroblock_address;
>> + i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
>> + int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
>> + x = i % width_in_mbs;
>> + y = i / width_in_mbs;
>>
>> + if (is_intra) {
>> + assert(msg);
>> + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
>> + msg += 4;
>> + } else {
>> + gen6_mfc_avc_pak_object_inter2(ctx, x, y, last_mb, qp, offset, encoder_context, slice_batch);
>> + offset += 64;
>> + }
>> + }
>> + } else {
>> if (is_intra) {
>> - assert(msg);
>> - gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
>> - msg += INTRA_VME_OUTPUT_IN_DWS;
>> + msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
>> } else {
>> - if (msg[0] & INTRA_MB_FLAG_MASK) {
>> + msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
>> + msg += 32; /* the first 32 DWs are MVs */
>> + offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
>> + }
>> +
>> + for (i = pSliceParameter->macroblock_address;
>> + i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
>> + int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
>> + x = i % width_in_mbs;
>> + y = i / width_in_mbs;
>> +
>> + if (is_intra) {
>> + assert(msg);
>> gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
>> + msg += INTRA_VME_OUTPUT_IN_DWS;
>> } else {
>> - gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
>> - }
>> + if (msg[0] & INTRA_MB_FLAG_MASK) {
>> + gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
>> + } else {
>> + gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
>> + }
>>
>> - msg += INTER_VME_OUTPUT_IN_DWS;
>> - offset += INTER_VME_OUTPUT_IN_BYTES;
>> + msg += INTER_VME_OUTPUT_IN_DWS;
>> + offset += INTER_VME_OUTPUT_IN_BYTES;
>> + }
>> }
>> }
>> -
>> +
>> dri_bo_unmap(vme_context->vme_output.bo);
>>
>> if ( last_slice ) {
>> @@ -896,8 +965,6 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
>> return batch_bo;
>> }
>>
>> -#else
>> -
>> static void
>> gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
>> struct encode_state *encode_state,
>> @@ -1291,9 +1358,6 @@ gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
>> return mfc_context->mfc_batchbuffer_surface.bo;
>> }
>>
>> -#endif
>> -
>> -
>> static void
>> gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
>> struct encode_state *encode_state,
>> @@ -1308,11 +1372,15 @@ gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
>> return;
>> }
>>
>> + if (encoder_context->quality_level == ENCODER_LOW_QUALITY )
>> + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
>> + else {
>> #if __SOFTWARE__
>> - slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
>> + slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
>> #else
>> - slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
>> + slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
>> #endif
>> + }
>>
>> // begin programing
>> intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
>> diff --git a/src/gen7_vme.c b/src/gen7_vme.c
>> index 042fe5d..135a5ad 100644
>> --- a/src/gen7_vme.c
>> +++ b/src/gen7_vme.c
>> @@ -63,6 +63,8 @@ enum VIDEO_CODING_TYPE{
>> enum AVC_VME_KERNEL_TYPE{
>> AVC_VME_INTRA_SHADER = 0,
>> AVC_VME_INTER_SHADER,
>> + AVC_VME_OLD_INTRA_SHADER,
>> + AVC_VME_OLD_INTER_SHADER,
>> AVC_VME_BATCHBUFFER,
>> AVC_VME_BINTER_SHADER,
>> AVC_VME_KERNEL_SUM
>> @@ -83,6 +85,14 @@ static const uint32_t gen7_vme_inter_frame[][4] = {
>> #include "shaders/vme/inter_frame_ivb.g7b"
>> };
>>
>> +static const uint32_t gen7_vme_old_intra_frame[][4] = {
>> +#include "shaders/vme_old/intra_frame.g7b"
>> +};
>> +
>> +static const uint32_t gen7_vme_old_inter_frame[][4] = {
>> +#include "shaders/vme_old/inter_frame.g7b"
>> +};
>> +
>> static const uint32_t gen7_vme_batchbuffer[][4] = {
>> #include "shaders/vme/batchbuffer.g7b"
>> };
>> @@ -107,6 +117,20 @@ static struct i965_kernel gen7_vme_kernels[] = {
>> NULL
>> },
>> {
>> + "AVC VME Old Intra Frame",
>> + AVC_VME_OLD_INTRA_SHADER,
>> + gen7_vme_old_intra_frame,
>> + sizeof(gen7_vme_old_intra_frame),
>> + NULL
>> + },
>> + {
>> + "AVC VME Old Inter Frame",
>> + AVC_VME_OLD_INTER_SHADER,
>> + gen7_vme_old_inter_frame,
>> + sizeof(gen7_vme_old_inter_frame),
>> + NULL
>> + },
>> + {
>> "AVC VME BATCHBUFFER",
>> AVC_VME_BATCHBUFFER,
>> gen7_vme_batchbuffer,
>> @@ -359,6 +383,36 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx,
>> return VA_STATUS_SUCCESS;
>> }
>>
>> +static VAStatus
>> +gen7_vme_vme_state_setup(VADriverContextP ctx,
>> + struct encode_state *encode_state,
>> + int is_intra,
>> + struct intel_encoder_context *encoder_context)
>> +{
>> + struct gen6_vme_context *vme_context = encoder_context->vme_context;
>> + unsigned int *vme_state_message;
>> + int i;
>> +
>> + //building VME state message
>> + dri_bo_map(vme_context->vme_state.bo, 1);
>> + assert(vme_context->vme_state.bo->virtual);
>> + vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
>> +
>> + vme_state_message[0] = 0x10010101;
>> + vme_state_message[1] = 0x100F0F0F;
>> + vme_state_message[2] = 0x10010101;
>> + vme_state_message[3] = 0x000F0F0F;
>> + for(i = 4; i < 14; i++) {
>> + vme_state_message[i] = 0x00000000;
>> + }
>> +
>> + for(i = 14; i < 32; i++) {
>> + vme_state_message[i] = 0x00000000;
>> + }
>> +
>> + dri_bo_unmap( vme_context->vme_state.bo);
>> + return VA_STATUS_SUCCESS;
>> +}
>>
>> static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
>> struct encode_state *encode_state,
>> @@ -598,25 +652,30 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx,
>> int s;
>> bool allow_hwscore = true;
>> int kernel_shader;
>> -
>> - for (s = 0; s < encode_state->num_slice_params_ext; s++) {
>> - pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
>> - if ((pSliceParameter->macroblock_address % width_in_mbs)) {
>> - allow_hwscore = false;
>> - break;
>> - }
>> + unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
>> +
>> + if (is_low_quality)
>> + allow_hwscore = false;
>> + else {
>> + for (s = 0; s < encode_state->num_slice_params_ext; s++) {
>> + pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
>> + if ((pSliceParameter->macroblock_address % width_in_mbs)) {
>> + allow_hwscore = false;
>> + break;
>> + }
>> + }
>> }
>>
>> if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
>> (pSliceParameter->slice_type == SLICE_TYPE_I)) {
>> - kernel_shader = AVC_VME_INTRA_SHADER;
>> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTRA_SHADER : AVC_VME_INTRA_SHADER);
>> } else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
>> (pSliceParameter->slice_type == SLICE_TYPE_SP)) {
>> - kernel_shader = AVC_VME_INTER_SHADER;
>> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
>> } else {
>> kernel_shader = AVC_VME_BINTER_SHADER;
>> if (!allow_hwscore)
>> - kernel_shader = AVC_VME_INTER_SHADER;
>> + kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
>> }
>>
>> if (allow_hwscore)
>> @@ -668,7 +727,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx,
>> gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
>> gen7_vme_interface_setup(ctx, encode_state, encoder_context);
>> gen7_vme_constant_setup(ctx, encode_state, encoder_context);
>> - gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
>> + if (encoder_context->quality_level == ENCODER_LOW_QUALITY)
>> + gen7_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
>> + else
>> + gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
>>
>> /*Programing media pipeline*/
>> gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
>> diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
>> index df625bb..54e657d 100755
>> --- a/src/i965_drv_video.c
>> +++ b/src/i965_drv_video.c
>> @@ -629,6 +629,7 @@ i965_GetConfigAttributes(VADriverContextP ctx,
>> int num_attribs)
>> {
>> VAStatus va_status;
>> + struct i965_driver_data *i965 = i965_driver_data(ctx);
>> int i;
>>
>> va_status = i965_validate_config(ctx, profile, entrypoint);
>> @@ -674,6 +675,15 @@ i965_GetConfigAttributes(VADriverContextP ctx,
>> break;
>> }
>>
>> + case VAConfigAttribEncQualityRange:
>> + if (entrypoint == VAEntrypointEncSlice) {
>> + attrib_list[i].value = 1;
>> + if (profile == VAProfileH264ConstrainedBaseline &&
>> + IS_GEN7(i965->intel.device_info))
>> + attrib_list[i].value = ENCODER_QUALITY_RANGE;
>> + break;
>> + }
>> +
>> default:
>> /* Do nothing */
>> attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED;
>> diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
>> index 63366cb..6d7d0fb 100644
>> --- a/src/i965_drv_video.h
>> +++ b/src/i965_drv_video.h
>> @@ -65,6 +65,11 @@
>> #define DEFAULT_HUE 0
>> #define DEFAULT_SATURATION 10
>>
>> +#define ENCODER_QUALITY_RANGE 2
>> +#define ENCODER_DEFAULT_QUALITY 1
>> +#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY
>> +#define ENCODER_LOW_QUALITY 2
>> +
>> struct i965_surface
>> {
>> struct object_base *base;
>> diff --git a/src/i965_encoder.c b/src/i965_encoder.c
>> index 14c37bb..126dcfc 100644
>> --- a/src/i965_encoder.c
>> +++ b/src/i965_encoder.c
>> @@ -123,6 +123,30 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
>> }
>>
>> static VAStatus
>> +intel_encoder_check_misc_parameter(VADriverContextP ctx,
>> + struct encode_state *encode_state,
>> + struct intel_encoder_context *encoder_context)
>> +{
>> +
>> + if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] &&
>> + encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
>> + VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer;
>> + VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
>> + encoder_context->quality_level = param_quality_level->quality_level;
>> +
>> + if (encoder_context->quality_level == 0)
>> + encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
>> + else if (encoder_context->quality_level > encoder_context->quality_range)
>> + goto error;
>> + }
>> +
>> + return VA_STATUS_SUCCESS;
>> +
>> +error:
>> + return VA_STATUS_ERROR_INVALID_PARAMETER;
>> +}
>> +
>> +static VAStatus
>> intel_encoder_check_avc_parameter(VADriverContextP ctx,
>> struct encode_state *encode_state,
>> struct intel_encoder_context *encoder_context)
>> @@ -278,6 +302,9 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
>>
>> vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
>>
>> + if (vaStatus == VA_STATUS_SUCCESS)
>> + vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context);
>> +
>> out:
>> return vaStatus;
>> }
>> @@ -335,6 +362,8 @@ intel_enc_hw_context_init(VADriverContextP ctx,
>> encoder_context->input_yuv_surface = VA_INVALID_SURFACE;
>> encoder_context->is_tmp_id = 0;
>> encoder_context->rate_control_mode = VA_RC_NONE;
>> + encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
>> + encoder_context->quality_range = 1;
>>
>> switch (obj_config->profile) {
>> case VAProfileMPEG2Simple:
>> @@ -395,7 +424,14 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
>> struct hw_context *
>> gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
>> {
>> - return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
>> + struct intel_encoder_context *encoder_context;
>> +
>> + encoder_context = (struct intel_encoder_context *)intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
>> +
>> + if (obj_config->profile == VAProfileH264ConstrainedBaseline)
>> + encoder_context->quality_range = ENCODER_QUALITY_RANGE;
>> +
>> + return (struct hw_context *)encoder_context;
>> }
>>
>> struct hw_context *
>> diff --git a/src/i965_encoder.h b/src/i965_encoder.h
>> index 71396d6..20d49fc 100644
>> --- a/src/i965_encoder.h
>> +++ b/src/i965_encoder.h
>> @@ -43,6 +43,8 @@ struct intel_encoder_context
>> VASurfaceID input_yuv_surface;
>> int is_tmp_id;
>> unsigned int rate_control_mode;
>> + unsigned int quality_level;
>> + unsigned int quality_range;
>> void *vme_context;
>> void *mfc_context;
>> void (*vme_context_destroy)(void *vme_context);
>
>
>
More information about the Libva
mailing list