[Libva] [PATCH 6/7] VP8 HWEnc: vp8 encode quality optimization
Zhong Li
zhong.li at intel.com
Mon Dec 22 23:46:23 PST 2014
1. Add mv and mode cost calc
2. Support 16x16 and 4x4 intra prediction
3. Support multi-mode intar prediction
4. Support 16x16 New_MV mode inter prediction
Signed-off-by: Zhong Li <zhong.li at intel.com>
---
src/gen6_mfc.h | 60 +++++++++++++
src/gen6_mfc_common.c | 69 ++++++++++++++
src/gen6_vme.h | 4 +
src/gen9_mfc.c | 150 +++++++++++++++++++++++--------
src/gen9_vme.c | 4 +-
src/shaders/vme/vme8.inc | 5 +-
src/shaders/vme/vp8_inter_frame_gen9.asm | 3 +-
src/shaders/vme/vp8_inter_frame_gen9.g9b | 7 +-
src/shaders/vme/vp8_intra_frame_gen9.g9b | 2 +-
9 files changed, 258 insertions(+), 46 deletions(-)
diff --git a/src/gen6_mfc.h b/src/gen6_mfc.h
index c941267..9afeb7b 100644
--- a/src/gen6_mfc.h
+++ b/src/gen6_mfc.h
@@ -62,6 +62,66 @@ struct encode_state;
#define CMD_LEN_IN_OWORD 4
+typedef enum {
+ VME_V_PRED = 0,
+ VME_H_PRED = 1,
+ VME_DC_PRED = 2,
+ VME_PL_PRED = 3,
+
+ VME_MB_INTRA_MODE_COUNT
+} VME_MB_INTRA_PRED_MODE;
+
+typedef enum {
+ PAK_DC_PRED = 0,
+ PAK_V_PRED = 1,
+ PAK_H_PRED = 2,
+ PAK_TM_PRED = 3,
+
+ PAK_MB_INTRA_MODE_COUNT
+} VP8_PAK_MB_INTRA_PRED_MODE;
+
+typedef enum
+{
+ VME_B_V_PRED = 0,
+ VME_B_H_PRED = 1,
+ VME_B_DC_PRED = 2,
+ VME_B_DL_PRED = 3,
+ VME_B_DR_PRED = 4,
+ VME_B_VR_PRED = 5,
+ VME_B_HD_PRED = 6,
+ VME_B_VL_PRED = 7,
+ VME_B_HU_PRED = 8,
+
+ VME_B_INTRA_MODE_COUNT
+} VME_BLOCK_INTRA_PRED_MODE;
+
+typedef enum
+{
+ PAK_B_DC_PRED = 0,
+ PAK_B_TM_PRED = 1,
+ PAK_B_VE_PRED = 2,
+ PAK_B_HE_PRED = 3,
+ PAK_B_LD_PRED = 4,
+ PAK_B_RD_PRED = 5,
+ PAK_B_VR_PRED = 6,
+ PAK_B_VL_PRED = 7,
+ PAK_B_HD_PRED = 8,
+ PAK_B_HU_PRED = 9,
+
+ PAK_B_INTRA_MODE_COUNT
+} VP8_PAK_BLOCK_INTRA_PRED_MODE;
+
+typedef struct
+{
+ int vme_intra_mb_mode;
+ int vp8_pak_intra_mb_mode;
+} vp8_intra_mb_mode_map_t;
+
+typedef struct
+{
+ int vme_intra_block_mode;
+ int vp8_pak_intra_block_mode;
+} vp8_intra_block_mode_map_t;
typedef enum _gen6_brc_status
{
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fe41dac..517eb98 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -938,6 +938,75 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
}
}
+void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
+ VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
+ int qp, m_cost, j, mv_count;
+ uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+ float lambda, m_costf;
+
+ int is_key_frame = !pic_param->pic_flags.bits.frame_type;
+
+ if (vme_state_message == NULL)
+ return;
+
+ lambda = intel_lambda_qp(q_matrix->quantization_index[0] >> 1);
+ if (is_key_frame) {
+ vme_state_message[MODE_INTRA_16X16] = 0;
+ m_cost = lambda * 16;
+ vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+ } else {
+ m_cost = 0;
+ vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
+ for (j = 1; j < 3; j++) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+ }
+ mv_count = 3;
+ for (j = 4; j <= 64; j *= 2) {
+ m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+ m_cost = (int)m_costf;
+ vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
+ mv_count++;
+ }
+
+ if (q_matrix->quantization_index[0] < 32 ) {
+ vme_state_message[MODE_INTRA_16X16] = 0x4a;
+ vme_state_message[MODE_INTRA_4X4] = 0x4a;
+ vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
+ vme_state_message[MODE_INTER_16X16] = 0x4a;
+ vme_state_message[MODE_INTER_16X8] = 0x4a;
+ vme_state_message[MODE_INTER_8X8] = 0x4a;
+ vme_state_message[MODE_INTER_4X4] = 0x4a;
+ return;
+ }
+ m_costf = lambda * 10;
+ vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_cost = lambda * 24;
+ vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+
+ m_costf = lambda * 2.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 4;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+ m_costf = lambda * 1.5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+ m_costf = lambda * 5;
+ m_cost = m_costf;
+ vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+ /* BWD is not used in P-frame */
+ vme_state_message[MODE_INTER_BWD] = 0;
+ }
+}
#define MB_SCOREBOARD_A (1 << 0)
#define MB_SCOREBOARD_B (1 << 1)
diff --git a/src/gen6_vme.h b/src/gen6_vme.h
index bc62c14..259c40f 100644
--- a/src/gen6_vme.h
+++ b/src/gen6_vme.h
@@ -106,6 +106,10 @@ extern void intel_vme_update_mbmv_cost(VADriverContextP ctx,
struct encode_state *encode_state,
struct intel_encoder_context *encoder_context);
+void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
+ struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context);
+
Bool gen7_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
#define MODE_INTRA_NONPRED 0
diff --git a/src/gen9_mfc.c b/src/gen9_mfc.c
index 28be67b..ee40c84 100644
--- a/src/gen9_mfc.c
+++ b/src/gen9_mfc.c
@@ -84,6 +84,7 @@ static struct i965_kernel gen9_mfc_kernels[] = {
#define INTER_16X8 0x01
#define INTER_8X16 0x02
#define SUBMB_SHAPE_MASK 0x00FF00
+#define INTER_16X16 0x00
#define INTER_MV8 (4 << 20)
#define INTER_MV32 (6 << 20)
@@ -183,23 +184,18 @@ gen9_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
mfc_context->mfc_indirect_pak_bse_object.end_offset);
OUT_BCS_BATCH(batch, 0);
- /* the DW6-10 is for MFX Indirect MV Object Base Address */
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
} else {
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
- /* the DW6-10 is for MFX Indirect MV Object Base Address */
- OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_BATCH(batch, 0);
- OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
- OUT_BCS_BATCH(batch, 0);
}
+ /* the DW6-10 is for MFX Indirect MV Object Base Address */
+ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
+ OUT_BCS_BATCH(batch, 0);
+
/* the DW11-15 is for MFX IT-COFF. Not used on encoder */
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
@@ -2684,7 +2680,7 @@ gen9_mfc_vp8_encoder_cfg(VADriverContextP ctx,
OUT_BCS_BATCH(batch,
0 << 9 | /* compressed bitstream output disable */
1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
- 0 << 6 | /* RC initial pass */
+ 1 << 6 | /* RC initial pass */
0 << 4 | /* upate segment feature date flag */
1 << 3 | /* bitstream statistics output enable */
1 << 2 | /* token statistics output enable */
@@ -2936,6 +2932,46 @@ gen9_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
gen9_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
}
+static const unsigned char
+vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
+ PAK_V_PRED,
+ PAK_H_PRED,
+ PAK_DC_PRED,
+ PAK_TM_PRED
+};
+
+static const unsigned char
+vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
+ PAK_B_VE_PRED,
+ PAK_B_HE_PRED,
+ PAK_B_DC_PRED,
+ PAK_B_LD_PRED,
+ PAK_B_RD_PRED,
+ PAK_B_VR_PRED,
+ PAK_B_HD_PRED,
+ PAK_B_VL_PRED,
+ PAK_B_HU_PRED
+};
+
+static int inline gen9_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
+{
+ unsigned int i, j, pak_pred_mode = 0;
+ unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
+ unsigned int max_intra_mode_count = (is_luma_4x4 ? VME_B_INTRA_MODE_COUNT : VME_MB_INTRA_MODE_COUNT);
+
+ if (!is_luma_4x4) {
+ pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
+ } else {
+ for (i = 0; i < 8; i++) {
+ vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
+ assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
+ pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
+ pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
+ }
+ }
+
+ return pak_pred_mode;
+}
static void
gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
@@ -2943,28 +2979,45 @@ gen9_mfc_vp8_pak_object_intra(VADriverContextP ctx,
int x, int y,
struct intel_batchbuffer *batch)
{
+ int i;
+ unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
+ unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
+ unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
+
if (batch == NULL)
batch = encoder_context->base.batch;
+ vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
+ assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
+ pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
+
+ vme_luma_pred_mode[0] = msg[1];
+ vme_luma_pred_mode[1] = msg[2];
+ vme_chroma_pred_mode = msg[3] & 0x3;
+
+ pak_luma_pred_mode[0] = gen9_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
+ pak_luma_pred_mode[1] = gen9_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
+ pak_chroma_pred_mode = gen9_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
+
BEGIN_BCS_BATCH(batch, 7);
OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch, 0);
OUT_BCS_BATCH(batch,
- (0 << 20) | /* mv format: intra mb */
- (0 << 18) | /* Segment ID */
- (0 << 17) | /* disable coeff clamp */
- (1 << 13) | /* intra mb flag */
- (0 << 11) | /* refer picture select: last frame */
- (0 << 8) | /* mb type: 16x16 intra mb */
- (0 << 4) | /* mb uv mode: dc_pred */
- (0 << 2) | /* skip mb flag: disable */
+ (0 << 20) | /* mv format: intra mb */
+ (0 << 18) | /* Segment ID */
+ (0 << 17) | /* disable coeff clamp */
+ (1 << 13) | /* intra mb flag */
+ (0 << 11) | /* refer picture select: last frame */
+ (pak_intra_mb_mode << 8) | /* mb type */
+ (pak_chroma_pred_mode << 4) | /* mb uv mode */
+ (0 << 2) | /* skip mb flag: disable */
0);
OUT_BCS_BATCH(batch, (y << 16) | x);
- OUT_BCS_BATCH(batch, 0); /* y_mode: dc_pred */
- OUT_BCS_BATCH(batch, 0);
+ OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
+ OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
ADVANCE_BCS_BATCH(batch);
}
@@ -2973,14 +3026,25 @@ static void
gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx,
struct intel_encoder_context *encoder_context,
unsigned int *msg,
+ int offset,
int x, int y,
struct intel_batchbuffer *batch)
{
struct gen6_vme_context *vme_context = encoder_context->vme_context;
+ int i;
if (batch == NULL)
batch = encoder_context->base.batch;
+ /* only support inter_16x16 now */
+ assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
+ /* for inter_16x16, all 16 MVs should be same,
+ * and move mv to the vme mb start address to make sure offset is 64 bytes aligned */
+ msg[0] = (msg[AVC_INTER_MV_OFFSET/4] & 0xfffefffe);
+ for (i = 1; i < 16; i++) {
+ msg[i] = msg[0];
+ }
+
BEGIN_BCS_BATCH(batch, 7);
OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
@@ -2988,7 +3052,7 @@ gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx,
(0 << 29) | /* enable inline mv data: disable */
64);
OUT_BCS_BATCH(batch,
- 0);
+ offset);
OUT_BCS_BATCH(batch,
(4 << 20) | /* mv format: inter */
(0 << 18) | /* Segment ID */
@@ -3002,9 +3066,9 @@ gen9_mfc_vp8_pak_object_inter(VADriverContextP ctx,
OUT_BCS_BATCH(batch, (y << 16) | x);
- /*zero mv*/
- OUT_BCS_BATCH(batch, 0x88888888);
- OUT_BCS_BATCH(batch, 0x88888888);
+ /*new mv*/
+ OUT_BCS_BATCH(batch, 0x8);
+ OUT_BCS_BATCH(batch, 0x8);
ADVANCE_BCS_BATCH(batch);
}
@@ -3023,17 +3087,18 @@ gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
unsigned int *msg = NULL;
unsigned char *msg_ptr = NULL;
- unsigned int i, is_intra_frame;
+ unsigned int i, offset, is_intra_frame;
is_intra_frame = !pic_param->pic_flags.bits.frame_type;
dri_bo_map(vme_context->vme_output.bo , 0);
- msg = msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
+ msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
int h_pos = i % width_in_mbs;
int v_pos = i / width_in_mbs;
-
+ msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
+
if (is_intra_frame) {
gen9_mfc_vp8_pak_object_intra(ctx,
encoder_context,
@@ -3041,11 +3106,25 @@ gen9_mfc_vp8_pak_pipeline(VADriverContextP ctx,
h_pos, v_pos,
slice_batch);
} else {
- gen9_mfc_vp8_pak_object_inter(ctx,
- encoder_context,
- msg,
- h_pos, v_pos,
- slice_batch);
+ int inter_rdo, intra_rdo;
+ inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
+ intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
+
+ if (intra_rdo < inter_rdo) {
+ gen9_mfc_vp8_pak_object_intra(ctx,
+ encoder_context,
+ msg,
+ h_pos, v_pos,
+ slice_batch);
+ } else {
+ offset = i * vme_context->vme_output.size_block;
+ gen9_mfc_vp8_pak_object_inter(ctx,
+ encoder_context,
+ msg,
+ offset,
+ h_pos, v_pos,
+ slice_batch);
+ }
}
}
@@ -3149,6 +3228,7 @@ gen9_mfc_vp8_encode_picture(VADriverContextP ctx,
struct intel_encoder_context *encoder_context)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ struct gen6_vme_context *vme_context = encoder_context->vme_context;
gen9_mfc_vp8_init(ctx, encode_state, encoder_context);
intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index 17f91bd..71bd049 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -1251,8 +1251,8 @@ static VAStatus gen9_vme_vp8_prepare(VADriverContextP ctx,
VAEncPictureParameterBufferVP8 *pPicParameter = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
int is_intra = !pPicParameter->pic_flags.bits.frame_type;
- /* add update vp8 mbmv cost later */
- //intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
+ /* update vp8 mbmv cost */
+ intel_vme_vp8_update_mbmv_cost(ctx, encode_state, encoder_context);
/*Setup all the memory object*/
gen9_vme_vp8_surface_setup(ctx, encode_state, is_intra, encoder_context);
diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc
index 46cefe4..1769cc3 100644
--- a/src/shaders/vme/vme8.inc
+++ b/src/shaders/vme/vme8.inc
@@ -30,8 +30,8 @@ define(`LUMA_INTRA_16x16_DISABLE', `0x1')
define(`LUMA_INTRA_8x8_DISABLE', `0x2')
define(`LUMA_INTRA_4x4_DISABLE', `0x4')
-define(`SUB_PART_8x4_DISABLE', `0x20')
-define(`SUB_PART_4x8_DISABLE', `0x40')
+define(`SUB_PART_8x4_DISABLE', `0x10')
+define(`SUB_PART_4x8_DISABLE', `0x20')
define(`INTRA_PRED_AVAIL_FLAG_AE', `0x60')
define(`INTRA_PRED_AVAIL_FLAG_B', `0x10')
@@ -55,6 +55,7 @@ define(`INTRA_SAD_NONE', `0x00000000')
define(`INTRA_SAD_HAAR', `0x00800000')
define(`INTER_PART_MASK', `0x00000000')
+define(`VP8_INTER_PART_MASK', `0x7e000000')
define(`SEARCH_CTRL_SINGLE', `0x00000000')
define(`SEARCH_CTRL_DUAL_START', `0x00000100')
diff --git a/src/shaders/vme/vp8_inter_frame_gen9.asm b/src/shaders/vme/vp8_inter_frame_gen9.asm
index c2cd0ec..d080fd0 100644
--- a/src/shaders/vme/vp8_inter_frame_gen9.asm
+++ b/src/shaders/vme/vp8_inter_frame_gen9.asm
@@ -427,7 +427,6 @@ mov (8) vme_msg_1<1>:UD vme_m1.0<8,8,1>:UD {align1};
/* m0 */
mov (1) vme_m0.12<1>:UD INTRA_SAD_HAAR:UD {align1}; /* 16x16 Source, Intra_harr */
-mov (1) vme_m0.15<1>:UB SUB_PART_8x4_DISABLE + SUB_PART_4x8_DISABLE {align1}; /* vp8 don't support 8x4 and 4x8 partion */
mov (8) vme_msg_0.0<1>:UD vme_m0.0<8,8,1>:UD {align1};
/* after verification it will be passed by using payload */
@@ -479,7 +478,7 @@ send (16)
{align1};
/* IME search */
-mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
+mov (1) vme_m0.12<1>:UD SEARCH_CTRL_SINGLE + VP8_INTER_PART_MASK + INTER_SAD_HAAR:UD {align1}; /* 16x16 Source, harr */
mov (1) vme_m0.22<1>:UW REF_REGION_SIZE {align1}; /* Reference Width&Height, 48x40 */
mov (1) vme_m0.0<1>:UD vme_m0.8<0,1,0>:UD {align1};
diff --git a/src/shaders/vme/vp8_inter_frame_gen9.g9b b/src/shaders/vme/vp8_inter_frame_gen9.g9b
index 7845083..bf8ad1c 100644
--- a/src/shaders/vme/vp8_inter_frame_gen9.g9b
+++ b/src/shaders/vme/vp8_inter_frame_gen9.g9b
@@ -146,13 +146,13 @@
{ 0x00000001, 0x2fa41a68, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa81a68, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
- { 0x00000020, 0x34000000, 0x0e001400, 0x00000870 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000860 },
{ 0x00000001, 0x2ac01a68, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa01a68, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa41a68, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa81a68, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00008, 0x06001400, 0x00000020 },
- { 0x00000020, 0x34000000, 0x0e001400, 0x00000810 },
+ { 0x00000020, 0x34000000, 0x0e001400, 0x00000800 },
{ 0x00000001, 0x2ac21a68, 0x00000fe4, 0x00000000 },
{ 0x0020000c, 0x2a801a68, 0x1e450ac0, 0x00020002 },
{ 0x00200040, 0x2a881a68, 0x1e450a80, 0x00030003 },
@@ -180,7 +180,6 @@
{ 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
{ 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
{ 0x00000001, 0x244c0608, 0x00000000, 0x00800000 },
- { 0x00000001, 0x244f0e88, 0x08000000, 0x00000060 },
{ 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
{ 0x0d600031, 0x21800a08, 0x06000800, 0x10782000 },
{ 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
@@ -193,7 +192,7 @@
{ 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
{ 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
{ 0x0a800031, 0x20000a60, 0x06000800, 0x040a0203 },
- { 0x00000001, 0x244c0608, 0x00000000, 0x00200000 },
+ { 0x00000001, 0x244c0608, 0x00000000, 0x7e200000 },
{ 0x00000001, 0x24561648, 0x10000000, 0x28302830 },
{ 0x00000001, 0x24400208, 0x00000448, 0x00000000 },
{ 0x00000040, 0x24401a68, 0x1e000440, 0xfff0fff0 },
diff --git a/src/shaders/vme/vp8_intra_frame_gen9.g9b b/src/shaders/vme/vp8_intra_frame_gen9.g9b
index c4f9baf..f056f4f 100644
--- a/src/shaders/vme/vp8_intra_frame_gen9.g9b
+++ b/src/shaders/vme/vp8_intra_frame_gen9.g9b
@@ -56,7 +56,7 @@
{ 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
{ 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
{ 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 },
- { 0x00000001, 0x244f0e88, 0x08000000, 0x00000060 },
+ { 0x00000001, 0x244f0e88, 0x08000000, 0x00000030 },
{ 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
{ 0x0d600031, 0x21800a08, 0x06000800, 0x10782000 },
{ 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
--
1.9.1
More information about the Libva
mailing list