[Libva] [PATCH V4: 04/11] Encoding: Pass the qp parameter into VME shader and VME shader select the different cost table based on input Qp on Ivy
Pengfei Qu
Pengfei.Qu at intel.com
Fri Aug 12 08:41:36 UTC 2016
From: Zhao Yakui <yakui.zhao at intel.com>
v1: add assert after bo map
In order to suppor that macroblocks have the different QP to do the motion
prediction, different cost tables are provided so that the VME engine can
select the different mode/motion-vector cost tables based on the input Qp.
Signed-off-by: Zhao Yakui <yakui.zhao at intel.com>
Signed-off-by: pjl <cecilia.peng at intel.com>
Signed-off-by: Pengfei Qu <Pengfei.Qu at intel.com>
---
src/gen6_mfc_common.c | 1 +
src/gen7_vme.c | 155 +++++++++++++++++++++++++----------
src/shaders/vme/inter_bframe_ivb.asm | 13 ++-
src/shaders/vme/inter_bframe_ivb.g7b | 24 ++++--
src/shaders/vme/inter_frame_ivb.asm | 12 ++-
src/shaders/vme/inter_frame_ivb.g7b | 12 ++-
src/shaders/vme/intra_frame_ivb.asm | 13 ++-
src/shaders/vme/intra_frame_ivb.g7b | 8 +-
src/shaders/vme/vme7.inc | 1 +
9 files changed, 180 insertions(+), 59 deletions(-)
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index 30cf7e5..5e2e1b6 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -1706,6 +1706,7 @@ intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
dri_bo_map(bo, 1);
+ assert(bo->virtual);
cost_table = (uint8_t *)(bo->virtual);
for (qp = 0; qp < QP_MAX; qp++) {
intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
diff --git a/src/gen7_vme.c b/src/gen7_vme.c
index fb6358f..d9898a7 100644
--- a/src/gen7_vme.c
+++ b/src/gen7_vme.c
@@ -374,58 +374,117 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
int i;
VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
+ dri_bo *cost_bo;
+ int slice_type;
+ uint8_t *cost_ptr;
+ int qp;
+
+ slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+ if (slice_type == SLICE_TYPE_I) {
+ cost_bo = vme_context->i_qp_cost_table;
+ } else if (slice_type == SLICE_TYPE_P) {
+ cost_bo = vme_context->p_qp_cost_table;
+ } else {
+ cost_bo = vme_context->b_qp_cost_table;
+ }
mb_cost_table = (unsigned int *)vme_context->vme_state_message;
- //building VME state message
dri_bo_map(vme_context->vme_state.bo, 1);
+ dri_bo_map(cost_bo, 0);
assert(vme_context->vme_state.bo->virtual);
+ assert(cost_bo->virtual);
vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
- if (((slice_param->slice_type == SLICE_TYPE_P) ||
- (slice_param->slice_type == SLICE_TYPE_SP)) &&
- !is_low_quality) {
- vme_state_message[0] = 0x01010101;
- vme_state_message[1] = 0x10010101;
- vme_state_message[2] = 0x0F0F0F0F;
- vme_state_message[3] = 0x100F0F0F;
- vme_state_message[4] = 0x01010101;
- vme_state_message[5] = 0x10010101;
- vme_state_message[6] = 0x0F0F0F0F;
- vme_state_message[7] = 0x100F0F0F;
- vme_state_message[8] = 0x01010101;
- vme_state_message[9] = 0x10010101;
- vme_state_message[10] = 0x0F0F0F0F;
- vme_state_message[11] = 0x000F0F0F;
- vme_state_message[12] = 0x00;
- vme_state_message[13] = 0x00;
- } else {
- vme_state_message[0] = 0x10010101;
- vme_state_message[1] = 0x100F0F0F;
- vme_state_message[2] = 0x10010101;
- vme_state_message[3] = 0x000F0F0F;
- vme_state_message[4] = 0;
- vme_state_message[5] = 0;
- vme_state_message[6] = 0;
- vme_state_message[7] = 0;
- vme_state_message[8] = 0;
- vme_state_message[9] = 0;
- vme_state_message[10] = 0;
- vme_state_message[11] = 0;
- vme_state_message[12] = 0;
- vme_state_message[13] = 0;
- }
+ cost_ptr = (uint8_t *)cost_bo->virtual;
+
+ /* up to 8 VME_SEARCH_PATH_LUT is supported */
+ /* Two subsequent qp will share the same mode/motion-vector cost table */
+ /* the range is from 0-51 */
+ for (i = 0; i < 8; i++) {
+
+ vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual +
+ i * 32;
+ if ((slice_type == SLICE_TYPE_P) && !is_low_quality) {
+ vme_state_message[0] = 0x01010101;
+ vme_state_message[1] = 0x10010101;
+ vme_state_message[2] = 0x0F0F0F0F;
+ vme_state_message[3] = 0x100F0F0F;
+ vme_state_message[4] = 0x01010101;
+ vme_state_message[5] = 0x10010101;
+ vme_state_message[6] = 0x0F0F0F0F;
+ vme_state_message[7] = 0x100F0F0F;
+ vme_state_message[8] = 0x01010101;
+ vme_state_message[9] = 0x10010101;
+ vme_state_message[10] = 0x0F0F0F0F;
+ vme_state_message[11] = 0x000F0F0F;
+ vme_state_message[12] = 0x00;
+ vme_state_message[13] = 0x00;
+ } else {
+ vme_state_message[0] = 0x10010101;
+ vme_state_message[1] = 0x100F0F0F;
+ vme_state_message[2] = 0x10010101;
+ vme_state_message[3] = 0x000F0F0F;
+ vme_state_message[4] = 0;
+ vme_state_message[5] = 0;
+ vme_state_message[6] = 0;
+ vme_state_message[7] = 0;
+ vme_state_message[8] = 0;
+ vme_state_message[9] = 0;
+ vme_state_message[10] = 0;
+ vme_state_message[11] = 0;
+ vme_state_message[12] = 0;
+ vme_state_message[13] = 0;
+ }
- vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
- vme_state_message[15] = 0;
- vme_state_message[16] = mb_cost_table[0];
- vme_state_message[17] = mb_cost_table[1];
- vme_state_message[18] = mb_cost_table[3];
- vme_state_message[19] = mb_cost_table[4];
+ qp = 8 * i;
- for(i = 20; i < 32; i++) {
- vme_state_message[i] = 0;
+ /* when qp is greater than 51, use the cost_table of qp=51 to fulfill */
+ if (qp > 51) {
+ qp = 51;
+ }
+ /* Setup the four LUT sets for MbMV cost */
+ mb_cost_table = (unsigned int *)(cost_ptr + qp * 32);
+ vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
+ vme_state_message[16] = mb_cost_table[0];
+ vme_state_message[17] = mb_cost_table[1];
+ vme_state_message[18] = mb_cost_table[3];
+ vme_state_message[19] = mb_cost_table[4];
+
+ qp += 2;
+ if (qp > 51) {
+ qp = 51;
+ }
+ mb_cost_table = (unsigned int *)(cost_ptr + qp * 32);
+ vme_state_message[14] |= ((mb_cost_table[2] & 0xFFFF) << 16);
+ vme_state_message[20] = mb_cost_table[0];
+ vme_state_message[21] = mb_cost_table[1];
+ vme_state_message[22] = mb_cost_table[3];
+ vme_state_message[23] = mb_cost_table[4];
+
+ qp += 2;
+ if (qp > 51) {
+ qp = 51;
+ }
+ vme_state_message[15] = (mb_cost_table[2] & 0xFFFF);
+ vme_state_message[24] = mb_cost_table[0];
+ vme_state_message[25] = mb_cost_table[1];
+ vme_state_message[26] = mb_cost_table[3];
+ vme_state_message[27] = mb_cost_table[4];
+
+ qp += 2;
+ if (qp > 51) {
+ qp = 51;
+ }
+ mb_cost_table = (unsigned int *)(cost_ptr + qp * 32);
+ vme_state_message[15] |= ((mb_cost_table[2] & 0xFFFF) << 16);
+ vme_state_message[28] = mb_cost_table[0];
+ vme_state_message[29] = mb_cost_table[1];
+ vme_state_message[30] = mb_cost_table[3];
+ vme_state_message[31] = mb_cost_table[4];
}
+ dri_bo_unmap(cost_bo);
dri_bo_unmap( vme_context->vme_state.bo);
return VA_STATUS_SUCCESS;
}
@@ -490,7 +549,16 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
int mb_x = 0, mb_y = 0;
int i, s, j;
unsigned int *command_ptr;
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int qp;
+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+ if (encoder_context->rate_control_mode == VA_RC_CQP)
+ qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+ else
+ qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
command_ptr = vme_context->vme_batchbuffer.bo->virtual;
@@ -540,7 +608,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
}
- *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+ *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
*command_ptr++ = kernel;
*command_ptr++ = 0;
*command_ptr++ = 0;
@@ -551,6 +619,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
*command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
*command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+ *command_ptr++ = qp;
i += 1;
}
diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm
index 499e426..1cba8c6 100644
--- a/src/shaders/vme/inter_bframe_ivb.asm
+++ b/src/shaders/vme/inter_bframe_ivb.asm
@@ -542,8 +542,19 @@ mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
mov (1) vme_m1.20<1>:ud mb_mvp_ref.4<0,1,0>:ud {align1};
mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+mov (1) tmp_reg0.0<1>:ud qp_ub<0,1,0>:ub {align1};
+/* lut_subindex */
+and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x06:ud {align1};
+shl (1) tmp_reg0.4<1>:ud tmp_reg1.0<0,1,0>:ud 10:ud {align1};
+
+/* lut_index */
+and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x038:ud {align1};
+shl (1) tmp_reg1.4<1>:ud tmp_reg1.0<0,1,0>:ud 5:ud {align1};
+
+add (1) tmp_reg0.0<1>:ud tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1};
/* Use one register as the descriptor of send instruction instead of hardcode*/
-mov (1) a0.0<1>:ud 0x0a686000:ud {align1};
+
+add (1) a0.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x0a686000:ud {align1};
send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1};
and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b
index 7f24b63..79eb292 100644
--- a/src/shaders/vme/inter_bframe_ivb.g7b
+++ b/src/shaders/vme/inter_bframe_ivb.g7b
@@ -53,7 +53,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x0000024a },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000256 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
{ 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 },
@@ -91,7 +91,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000001fe },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x0000020a },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
{ 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 },
@@ -127,7 +127,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000001b6 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000001c2 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
{ 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 },
@@ -164,7 +164,7 @@
{ 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
{ 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x0000016c },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000178 },
{ 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 },
{ 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
@@ -205,13 +205,13 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x00000106 },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000ee },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
{ 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
{ 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 },
@@ -230,13 +230,13 @@
{ 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000d4 },
{ 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 },
{ 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 },
{ 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 },
{ 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 },
@@ -276,7 +276,13 @@
{ 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 },
{ 0x00000001, 0x24740021, 0x00000ac4, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
- { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 },
+ { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
+ { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 },
+ { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a },
+ { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 },
+ { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 },
+ { 0x00000040, 0x24000421, 0x00000404, 0x00000424 },
+ { 0x00000040, 0x22000c20, 0x00000400, 0x0a686000 },
{ 0x08000031, 0x21800221, 0x00000800, 0x00000200 },
{ 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x0000002c },
diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm
index 1d67c50..8124edf 100644
--- a/src/shaders/vme/inter_frame_ivb.asm
+++ b/src/shaders/vme/inter_frame_ivb.asm
@@ -457,9 +457,19 @@ mov (1) vme_m1.16<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
mov (1) vme_m1.20<1>:ud mb_mvp_ref.0<0,1,0>:ud {align1};
mov (8) vme_msg_1.0<1>:UD vme_m1.0<8,8,1>:UD {align1};
+mov (1) tmp_reg0.0<1>:ud qp_ub<0,1,0>:ub {align1};
+/* lut_subindex */
+and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x06:ud {align1};
+shl (1) tmp_reg0.4<1>:ud tmp_reg1.0<0,1,0>:ud 10:ud {align1};
+/* lut_index */
+and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x038:ud {align1};
+shl (1) tmp_reg1.4<1>:ud tmp_reg1.0<0,1,0>:ud 5:ud {align1};
+
+add (1) tmp_reg0.0<1>:ud tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1};
/* Use one register as the descriptor of send instruction instead of hardcode*/
-mov (1) a0.0<1>:ud 0x0a686000:ud {align1};
+
+add (1) a0.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x0a686000:ud {align1};
send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1};
and.z.f0.0 (1) null<1>:ud vme_wb0.0<0,1,0>:ud INTRAMBFLAG_MASK:ud {align1} ;
diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b
index df9572f..9f0dfae 100644
--- a/src/shaders/vme/inter_frame_ivb.g7b
+++ b/src/shaders/vme/inter_frame_ivb.g7b
@@ -141,13 +141,13 @@
{ 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000de },
{ 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
{ 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
{ 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
{ 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
{ 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
- { 0x00000020, 0x34001c00, 0x00001400, 0x000000c6 },
+ { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 },
{ 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
{ 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
{ 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
@@ -193,7 +193,13 @@
{ 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 },
{ 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 },
{ 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
- { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 },
+ { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
+ { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 },
+ { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a },
+ { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 },
+ { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 },
+ { 0x00000040, 0x24000421, 0x00000404, 0x00000424 },
+ { 0x00000040, 0x22000c20, 0x00000400, 0x0a686000 },
{ 0x08000031, 0x21800221, 0x00000800, 0x00000200 },
{ 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
{ 0x00110020, 0x34001c00, 0x00001400, 0x0000002c },
diff --git a/src/shaders/vme/intra_frame_ivb.asm b/src/shaders/vme/intra_frame_ivb.asm
index 9efbfdc..97a20eb 100644
--- a/src/shaders/vme/intra_frame_ivb.asm
+++ b/src/shaders/vme/intra_frame_ivb.asm
@@ -104,8 +104,19 @@ mov (8) vme_msg_4<1>:UD 0x0 {align1};
mov (16) vme_msg_4.0<1>:UB INEP_COL0.3<32,8,4>:UB {align1};
mov (1) vme_msg_4.16<1>:UD INTRA_PREDICTORE_MODE {align1};
+mov (1) tmp_reg0.0<1>:ud qp_ub<0,1,0>:ub {align1};
+/* lut_subindex */
+and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x06:ud {align1};
+shl (1) tmp_reg0.4<1>:ud tmp_reg1.0<0,1,0>:ud 10:ud {align1};
+
+/* lut_index */
+and (1) tmp_reg1.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x038:ud {align1};
+shl (1) tmp_reg1.4<1>:ud tmp_reg1.0<0,1,0>:ud 5:ud {align1};
+
+add (1) tmp_reg0.0<1>:ud tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1};
/* Use one register as the descriptor of send instruction instead of hardcode*/
-mov (1) a0.0<1>:ud 0x0a184000:ud {align1};
+
+add (1) a0.0<1>:ud tmp_reg0.0<0,1,0>:ud 0x0a184000:ud {align1};
send (1) vme_wb.0<1>:ud vme_msg_0 0x08 a0.0<0,1,0>:ud {align1};
/*
diff --git a/src/shaders/vme/intra_frame_ivb.g7b b/src/shaders/vme/intra_frame_ivb.g7b
index 7dd16fc..d2aa1eb 100644
--- a/src/shaders/vme/intra_frame_ivb.g7b
+++ b/src/shaders/vme/intra_frame_ivb.g7b
@@ -35,7 +35,13 @@
{ 0x00600001, 0x288000e1, 0x00000000, 0x00000000 },
{ 0x00800001, 0x28800231, 0x00cf03a3, 0x00000000 },
{ 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
- { 0x00000001, 0x22000060, 0x00000000, 0x0a184000 },
+ { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
+ { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 },
+ { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a },
+ { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 },
+ { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 },
+ { 0x00000040, 0x24000421, 0x00000404, 0x00000424 },
+ { 0x00000040, 0x22000c20, 0x00000400, 0x0a184000 },
{ 0x08000031, 0x21800221, 0x00000800, 0x00000200 },
{ 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
{ 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
diff --git a/src/shaders/vme/vme7.inc b/src/shaders/vme/vme7.inc
index acff81f..8c1731c 100644
--- a/src/shaders/vme/vme7.inc
+++ b/src/shaders/vme/vme7.inc
@@ -152,6 +152,7 @@ define(`input_mb_intra_ub', `inline_reg0.5')
define(`num_macroblocks', `inline_reg0.6')
define(`quality_level_ub', `inline_reg0.7')
+define(`qp_ub', `inline_reg0.8')
/*
* GRF 6~11 -- reserved
*/
--
2.7.4
More information about the Libva
mailing list