[Libva] [PATCH V3 2/2] Add multi quality levels encoding support for GEN7
Zhong Li
zhong.li at intel.com
Tue Jun 10 19:49:21 PDT 2014
Two encoding quality levels are support on GEN7.
Default quality level is set to be 1, which has better quality,
but higher gpu usage and worse performance.
The second quality level is set to be 2, which has lower gpu usage and
better performance,but worse quality.
Other platforms support for multi-quality-level will be added later.
v1->v2: 1. follow haihao's comments to init and check quality_level.
2. remove CBR limitation for low quality level.
Signed-off-by: Zhong Li <zhong.li at intel.com>
---
src/gen6_mfc.c | 130 ++++++++++++++++++++++++++++++++++++++------------
src/gen7_vme.c | 84 +++++++++++++++++++++++++++-----
src/i965_drv_video.c | 10 ++++
src/i965_drv_video.h | 5 ++
src/i965_encoder.c | 38 ++++++++++++++-
src/i965_encoder.h | 2 +
6 files changed, 226 insertions(+), 43 deletions(-)
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 0a10054..30cdb66 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -676,8 +676,6 @@ gen6_mfc_stop(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
-#if __SOFTWARE__
-
static int
gen6_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb, int qp,unsigned int *msg,
struct intel_encoder_context *encoder_context,
@@ -768,6 +766,58 @@ gen6_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, in
return len_in_dwords;
}
+
+static int
+gen6_mfc_avc_pak_object_inter2(VADriverContextP ctx, int x, int y, int end_mb, int qp,
+ unsigned int offset,
+ struct intel_encoder_context *encoder_context,
+ struct intel_batchbuffer *batch)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int len_in_dwords = 11;
+
+ if (batch == NULL)
+ batch = encoder_context->base.batch;
+
+ BEGIN_BCS_BATCH(batch, len_in_dwords);
+
+ OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
+
+ OUT_BCS_BATCH(batch, 32); /* 32 MV*/
+ OUT_BCS_BATCH(batch, offset);
+
+ OUT_BCS_BATCH(batch,
+ (1 << 24) | /* PackedMvNum, Debug*/
+ (4 << 20) | /* 8 MV, SNB don't use it*/
+ (1 << 19) | /* CbpDcY */
+ (1 << 18) | /* CbpDcU */
+ (1 << 17) | /* CbpDcV */
+ (0 << 15) | /* Transform8x8Flag = 0*/
+ (0 << 14) | /* Frame based*/
+ (0 << 13) | /* Inter MB */
+ (1 << 8) | /* MbType = P_L0_16x16 */
+ (0 << 7) | /* MBZ for frame */
+ (0 << 6) | /* MBZ */
+ (2 << 4) | /* MBZ for inter*/
+ (0 << 3) | /* MBZ */
+ (0 << 2) | /* SkipMbFlag */
+ (0 << 0)); /* InterMbMode */
+
+ OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x); /* Code Block Pattern for Y*/
+ OUT_BCS_BATCH(batch, 0x000F000F); /* Code Block Pattern */
+ OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp); /* Last MB */
+
+ /*Stuff for Inter MB*/
+ OUT_BCS_BATCH(batch, 0x0);
+ OUT_BCS_BATCH(batch, 0x0);
+ OUT_BCS_BATCH(batch, 0x0);
+
+ OUT_BCS_BATCH(batch, 0xF0020000); /*MaxSizeInWord and TargetSzieInWord*/
+
+ ADVANCE_BCS_BATCH(batch);
+
+ return len_in_dwords;
+}
static void
gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
@@ -791,6 +841,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
unsigned int tail_data[] = { 0x0, 0x0 };
int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
int is_intra = slice_type == SLICE_TYPE_I;
+ int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
if (rate_control_mode == VA_RC_CBR) {
qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
@@ -818,36 +869,54 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
dri_bo_map(vme_context->vme_output.bo , 1);
msg = (unsigned int *)vme_context->vme_output.bo->virtual;
- if (is_intra) {
- msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
- } else {
- msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
- msg += 32; /* the first 32 DWs are MVs */
- offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
- }
-
- for (i = pSliceParameter->macroblock_address;
- i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
- int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
- x = i % width_in_mbs;
- y = i / width_in_mbs;
+ if (is_low_quality) {
+ for (i = pSliceParameter->macroblock_address;
+ i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
+ int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
+ x = i % width_in_mbs;
+ y = i / width_in_mbs;
+ if (is_intra) {
+ assert(msg);
+ gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
+ msg += 4;
+ } else {
+ gen6_mfc_avc_pak_object_inter2(ctx, x, y, last_mb, qp, offset, encoder_context, slice_batch);
+ offset += 64;
+ }
+ }
+ } else {
if (is_intra) {
- assert(msg);
- gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
- msg += INTRA_VME_OUTPUT_IN_DWS;
+ msg += pSliceParameter->macroblock_address * INTRA_VME_OUTPUT_IN_DWS;
} else {
- if (msg[0] & INTRA_MB_FLAG_MASK) {
+ msg += pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_DWS;
+ msg += 32; /* the first 32 DWs are MVs */
+ offset = pSliceParameter->macroblock_address * INTER_VME_OUTPUT_IN_BYTES;
+ }
+
+ for (i = pSliceParameter->macroblock_address;
+ i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
+ int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
+ x = i % width_in_mbs;
+ y = i / width_in_mbs;
+
+ if (is_intra) {
+ assert(msg);
gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
+ msg += INTRA_VME_OUTPUT_IN_DWS;
} else {
- gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
- }
+ if (msg[0] & INTRA_MB_FLAG_MASK) {
+ gen6_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
+ } else {
+ gen6_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
+ }
- msg += INTER_VME_OUTPUT_IN_DWS;
- offset += INTER_VME_OUTPUT_IN_BYTES;
+ msg += INTER_VME_OUTPUT_IN_DWS;
+ offset += INTER_VME_OUTPUT_IN_BYTES;
+ }
}
}
-
+
dri_bo_unmap(vme_context->vme_output.bo);
if ( last_slice ) {
@@ -896,8 +965,6 @@ gen6_mfc_avc_software_batchbuffer(VADriverContextP ctx,
return batch_bo;
}
-#else
-
static void
gen6_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -1291,9 +1358,6 @@ gen6_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
return mfc_context->mfc_batchbuffer_surface.bo;
}
-#endif
-
-
static void
gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -1308,11 +1372,15 @@ gen6_mfc_avc_pipeline_programing(VADriverContextP ctx,
return;
}
+ if (encoder_context->quality_level == ENCODER_LOW_QUALITY )
+ slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
+ else {
#if __SOFTWARE__
- slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
+ slice_batch_bo = gen6_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
#else
- slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
+ slice_batch_bo = gen6_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
#endif
+ }
// begin programing
intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
diff --git a/src/gen7_vme.c b/src/gen7_vme.c
index 042fe5d..135a5ad 100644
--- a/src/gen7_vme.c
+++ b/src/gen7_vme.c
@@ -63,6 +63,8 @@ enum VIDEO_CODING_TYPE{
enum AVC_VME_KERNEL_TYPE{
AVC_VME_INTRA_SHADER = 0,
AVC_VME_INTER_SHADER,
+ AVC_VME_OLD_INTRA_SHADER,
+ AVC_VME_OLD_INTER_SHADER,
AVC_VME_BATCHBUFFER,
AVC_VME_BINTER_SHADER,
AVC_VME_KERNEL_SUM
@@ -83,6 +85,14 @@ static const uint32_t gen7_vme_inter_frame[][4] = {
#include "shaders/vme/inter_frame_ivb.g7b"
};
+static const uint32_t gen7_vme_old_intra_frame[][4] = {
+#include "shaders/vme_old/intra_frame.g7b"
+};
+
+static const uint32_t gen7_vme_old_inter_frame[][4] = {
+#include "shaders/vme_old/inter_frame.g7b"
+};
+
static const uint32_t gen7_vme_batchbuffer[][4] = {
#include "shaders/vme/batchbuffer.g7b"
};
@@ -107,6 +117,20 @@ static struct i965_kernel gen7_vme_kernels[] = {
NULL
},
{
+ "AVC VME Old Intra Frame",
+ AVC_VME_OLD_INTRA_SHADER,
+ gen7_vme_old_intra_frame,
+ sizeof(gen7_vme_old_intra_frame),
+ NULL
+ },
+ {
+ "AVC VME Old Inter Frame",
+ AVC_VME_OLD_INTER_SHADER,
+ gen7_vme_old_inter_frame,
+ sizeof(gen7_vme_old_inter_frame),
+ NULL
+ },
+ {
"AVC VME BATCHBUFFER",
AVC_VME_BATCHBUFFER,
gen7_vme_batchbuffer,
@@ -359,6 +383,36 @@ static VAStatus gen7_vme_constant_setup(VADriverContextP ctx,
return VA_STATUS_SUCCESS;
}
+static VAStatus
+gen7_vme_vme_state_setup(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ int is_intra,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ unsigned int *vme_state_message;
+ int i;
+
+ //building VME state message
+ dri_bo_map(vme_context->vme_state.bo, 1);
+ assert(vme_context->vme_state.bo->virtual);
+ vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
+
+ vme_state_message[0] = 0x10010101;
+ vme_state_message[1] = 0x100F0F0F;
+ vme_state_message[2] = 0x10010101;
+ vme_state_message[3] = 0x000F0F0F;
+ for(i = 4; i < 14; i++) {
+ vme_state_message[i] = 0x00000000;
+ }
+
+ for(i = 14; i < 32; i++) {
+ vme_state_message[i] = 0x00000000;
+ }
+
+ dri_bo_unmap( vme_context->vme_state.bo);
+ return VA_STATUS_SUCCESS;
+}
static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
struct encode_state *encode_state,
@@ -598,25 +652,30 @@ static void gen7_vme_pipeline_programing(VADriverContextP ctx,
int s;
bool allow_hwscore = true;
int kernel_shader;
-
- for (s = 0; s < encode_state->num_slice_params_ext; s++) {
- pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
- if ((pSliceParameter->macroblock_address % width_in_mbs)) {
- allow_hwscore = false;
- break;
- }
+ unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
+
+ if (is_low_quality)
+ allow_hwscore = false;
+ else {
+ for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+ pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
+ if ((pSliceParameter->macroblock_address % width_in_mbs)) {
+ allow_hwscore = false;
+ break;
+ }
+ }
}
if ((pSliceParameter->slice_type == SLICE_TYPE_I) ||
(pSliceParameter->slice_type == SLICE_TYPE_I)) {
- kernel_shader = AVC_VME_INTRA_SHADER;
+ kernel_shader = (is_low_quality ? AVC_VME_OLD_INTRA_SHADER : AVC_VME_INTRA_SHADER);
} else if ((pSliceParameter->slice_type == SLICE_TYPE_P) ||
(pSliceParameter->slice_type == SLICE_TYPE_SP)) {
- kernel_shader = AVC_VME_INTER_SHADER;
+ kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
} else {
kernel_shader = AVC_VME_BINTER_SHADER;
if (!allow_hwscore)
- kernel_shader = AVC_VME_INTER_SHADER;
+ kernel_shader = (is_low_quality ? AVC_VME_OLD_INTER_SHADER : AVC_VME_INTER_SHADER);
}
if (allow_hwscore)
@@ -668,7 +727,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx,
gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
gen7_vme_interface_setup(ctx, encode_state, encoder_context);
gen7_vme_constant_setup(ctx, encode_state, encoder_context);
- gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
+ if (encoder_context->quality_level == ENCODER_LOW_QUALITY)
+ gen7_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
+ else
+ gen7_vme_avc_state_setup(ctx, encode_state, is_intra, encoder_context);
/*Programing media pipeline*/
gen7_vme_pipeline_programing(ctx, encode_state, encoder_context);
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index df625bb..54e657d 100755
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -629,6 +629,7 @@ i965_GetConfigAttributes(VADriverContextP ctx,
int num_attribs)
{
VAStatus va_status;
+ struct i965_driver_data *i965 = i965_driver_data(ctx);
int i;
va_status = i965_validate_config(ctx, profile, entrypoint);
@@ -674,6 +675,15 @@ i965_GetConfigAttributes(VADriverContextP ctx,
break;
}
+ case VAConfigAttribEncQualityRange:
+ if (entrypoint == VAEntrypointEncSlice) {
+ attrib_list[i].value = 1;
+ if (profile == VAProfileH264ConstrainedBaseline &&
+ IS_GEN7(i965->intel.device_info))
+ attrib_list[i].value = ENCODER_QUALITY_RANGE;
+ break;
+ }
+
default:
/* Do nothing */
attrib_list[i].value = VA_ATTRIB_NOT_SUPPORTED;
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index 63366cb..6d7d0fb 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -65,6 +65,11 @@
#define DEFAULT_HUE 0
#define DEFAULT_SATURATION 10
+#define ENCODER_QUALITY_RANGE 2
+#define ENCODER_DEFAULT_QUALITY 1
+#define ENCODER_HIGH_QUALITY ENCODER_DEFAULT_QUALITY
+#define ENCODER_LOW_QUALITY 2
+
struct i965_surface
{
struct object_base *base;
diff --git a/src/i965_encoder.c b/src/i965_encoder.c
index 14c37bb..126dcfc 100644
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -123,6 +123,30 @@ intel_encoder_check_yuv_surface(VADriverContextP ctx,
}
static VAStatus
+intel_encoder_check_misc_parameter(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+
+ if (encode_state->misc_param[VAEncMiscParameterTypeQualityLevel] &&
+ encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer) {
+ VAEncMiscParameterBuffer* pMiscParam = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeQualityLevel]->buffer;
+ VAEncMiscParameterBufferQualityLevel* param_quality_level = (VAEncMiscParameterBufferQualityLevel*)pMiscParam->data;
+ encoder_context->quality_level = param_quality_level->quality_level;
+
+ if (encoder_context->quality_level == 0)
+ encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
+ else if (encoder_context->quality_level > encoder_context->quality_range)
+ goto error;
+ }
+
+ return VA_STATUS_SUCCESS;
+
+error:
+ return VA_STATUS_ERROR_INVALID_PARAMETER;
+}
+
+static VAStatus
intel_encoder_check_avc_parameter(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
@@ -278,6 +302,9 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
+ if (vaStatus == VA_STATUS_SUCCESS)
+ vaStatus = intel_encoder_check_misc_parameter(ctx, encode_state, encoder_context);
+
out:
return vaStatus;
}
@@ -335,6 +362,8 @@ intel_enc_hw_context_init(VADriverContextP ctx,
encoder_context->input_yuv_surface = VA_INVALID_SURFACE;
encoder_context->is_tmp_id = 0;
encoder_context->rate_control_mode = VA_RC_NONE;
+ encoder_context->quality_level = ENCODER_DEFAULT_QUALITY;
+ encoder_context->quality_range = 1;
switch (obj_config->profile) {
case VAProfileMPEG2Simple:
@@ -395,7 +424,14 @@ gen6_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
struct hw_context *
gen7_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
{
- return intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
+ struct intel_encoder_context *encoder_context;
+
+ encoder_context = (struct intel_encoder_context *)intel_enc_hw_context_init(ctx, obj_config, gen7_vme_context_init, gen7_mfc_context_init);
+
+ if (obj_config->profile == VAProfileH264ConstrainedBaseline)
+ encoder_context->quality_range = ENCODER_QUALITY_RANGE;
+
+ return (struct hw_context *)encoder_context;
}
struct hw_context *
diff --git a/src/i965_encoder.h b/src/i965_encoder.h
index 71396d6..20d49fc 100644
--- a/src/i965_encoder.h
+++ b/src/i965_encoder.h
@@ -43,6 +43,8 @@ struct intel_encoder_context
VASurfaceID input_yuv_surface;
int is_tmp_id;
unsigned int rate_control_mode;
+ unsigned int quality_level;
+ unsigned int quality_range;
void *vme_context;
void *mfc_context;
void (*vme_context_destroy)(void *vme_context);
--
1.7.9.5
More information about the Libva
mailing list