[Libva] [PATCH V4: 04/11] Encoding: Pass the qp parameter into VME shader and VME shader select the different cost table based on input Qp on Ivy

Pengfei Qu Pengfei.Qu at intel.com
Fri Aug 12 08:41:36 UTC 2016


From: Zhao Yakui <yakui.zhao at intel.com>

v1: add assert after bo map

In order to suppor that macroblocks have the different QP to do the motion
prediction, different cost tables are provided so that the VME engine can
select the different mode/motion-vector cost tables based on the input Qp.

Signed-off-by: Zhao Yakui <yakui.zhao at intel.com>
Signed-off-by: pjl <cecilia.peng at intel.com>
Signed-off-by: Pengfei Qu <Pengfei.Qu at intel.com>
---
 src/gen6_mfc_common.c                |   1 +
 src/gen7_vme.c                       | 155 +++++++++++++++++++++++++----------
 src/shaders/vme/inter_bframe_ivb.asm |  13 ++-
 src/shaders/vme/inter_bframe_ivb.g7b |  24 ++++--
 src/shaders/vme/inter_frame_ivb.asm  |  12 ++-
 src/shaders/vme/inter_frame_ivb.g7b  |  12 ++-
 src/shaders/vme/intra_frame_ivb.asm  |  13 ++-
 src/shaders/vme/intra_frame_ivb.g7b  |   8 +-
 src/shaders/vme/vme7.inc             |   1 +
 9 files changed, 180 insertions(+), 59 deletions(-)

diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index 30cf7e5..5e2e1b6 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -1706,6 +1706,7 @@ intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
 
     dri_bo_map(bo, 1);
 
+    assert(bo->virtual);
     cost_table = (uint8_t *)(bo->virtual);
     for (qp = 0; qp < QP_MAX; qp++) {
         intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
diff --git a/src/gen7_vme.c b/src/gen7_vme.c
index fb6358f..d9898a7 100644
--- a/src/gen7_vme.c
+++ b/src/gen7_vme.c
@@ -374,58 +374,117 @@ static VAStatus gen7_vme_avc_state_setup(VADriverContextP ctx,
     int i;
     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
     unsigned int is_low_quality = (encoder_context->quality_level == ENCODER_LOW_QUALITY);
+    dri_bo *cost_bo;
+    int slice_type;
+    uint8_t *cost_ptr;
+    int qp;
+
+    slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+    if (slice_type == SLICE_TYPE_I) {
+        cost_bo = vme_context->i_qp_cost_table;
+    } else if (slice_type == SLICE_TYPE_P) {
+        cost_bo = vme_context->p_qp_cost_table;
+    } else {
+        cost_bo = vme_context->b_qp_cost_table;
+    }
 
     mb_cost_table = (unsigned int *)vme_context->vme_state_message;
-    //building VME state message
     dri_bo_map(vme_context->vme_state.bo, 1);
+    dri_bo_map(cost_bo, 0);
     assert(vme_context->vme_state.bo->virtual);
+    assert(cost_bo->virtual);
     vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual;
 
-    if (((slice_param->slice_type == SLICE_TYPE_P) ||
-         (slice_param->slice_type == SLICE_TYPE_SP)) &&
-        !is_low_quality) {
-        vme_state_message[0] = 0x01010101;
-        vme_state_message[1] = 0x10010101;
-        vme_state_message[2] = 0x0F0F0F0F;
-        vme_state_message[3] = 0x100F0F0F;
-        vme_state_message[4] = 0x01010101;
-        vme_state_message[5] = 0x10010101;
-        vme_state_message[6] = 0x0F0F0F0F;
-        vme_state_message[7] = 0x100F0F0F;
-        vme_state_message[8] = 0x01010101;
-        vme_state_message[9] = 0x10010101;
-        vme_state_message[10] = 0x0F0F0F0F;
-        vme_state_message[11] = 0x000F0F0F;
-        vme_state_message[12] = 0x00;
-        vme_state_message[13] = 0x00;
-    } else {
-        vme_state_message[0] = 0x10010101;
-        vme_state_message[1] = 0x100F0F0F;
-        vme_state_message[2] = 0x10010101;
-        vme_state_message[3] = 0x000F0F0F;
-        vme_state_message[4] = 0;
-        vme_state_message[5] = 0;
-        vme_state_message[6] = 0;
-        vme_state_message[7] = 0;
-        vme_state_message[8] = 0;
-        vme_state_message[9] = 0;
-        vme_state_message[10] = 0;
-        vme_state_message[11] = 0;
-        vme_state_message[12] = 0;
-        vme_state_message[13] = 0;
-    }
+    cost_ptr = (uint8_t *)cost_bo->virtual;
+
+    /* up to 8 VME_SEARCH_PATH_LUT is supported */
+    /* Two subsequent qp will share the same mode/motion-vector cost table */
+    /* the range is from 0-51 */
+    for (i = 0; i < 8; i++)  {
+
+        vme_state_message = (unsigned int *)vme_context->vme_state.bo->virtual +
+                             i * 32;
+        if ((slice_type == SLICE_TYPE_P) && !is_low_quality) {
+            vme_state_message[0] = 0x01010101;
+            vme_state_message[1] = 0x10010101;
+            vme_state_message[2] = 0x0F0F0F0F;
+            vme_state_message[3] = 0x100F0F0F;
+            vme_state_message[4] = 0x01010101;
+            vme_state_message[5] = 0x10010101;
+            vme_state_message[6] = 0x0F0F0F0F;
+            vme_state_message[7] = 0x100F0F0F;
+            vme_state_message[8] = 0x01010101;
+            vme_state_message[9] = 0x10010101;
+            vme_state_message[10] = 0x0F0F0F0F;
+            vme_state_message[11] = 0x000F0F0F;
+            vme_state_message[12] = 0x00;
+            vme_state_message[13] = 0x00;
+        } else {
+            vme_state_message[0] = 0x10010101;
+            vme_state_message[1] = 0x100F0F0F;
+            vme_state_message[2] = 0x10010101;
+            vme_state_message[3] = 0x000F0F0F;
+            vme_state_message[4] = 0;
+            vme_state_message[5] = 0;
+            vme_state_message[6] = 0;
+            vme_state_message[7] = 0;
+            vme_state_message[8] = 0;
+            vme_state_message[9] = 0;
+            vme_state_message[10] = 0;
+            vme_state_message[11] = 0;
+            vme_state_message[12] = 0;
+            vme_state_message[13] = 0;
+        }
 
-    vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
-    vme_state_message[15] = 0;
-    vme_state_message[16] = mb_cost_table[0];
-    vme_state_message[17] = mb_cost_table[1];
-    vme_state_message[18] = mb_cost_table[3];
-    vme_state_message[19] = mb_cost_table[4];
+        qp = 8 * i;
 
-    for(i = 20; i < 32; i++) {
-        vme_state_message[i] = 0;
+        /* when qp is greater than 51, use the cost_table of qp=51 to fulfill */
+        if (qp > 51) {
+            qp = 51;
+        }
+        /* Setup the four LUT sets for MbMV cost */
+        mb_cost_table = (unsigned int *)(cost_ptr + qp * 32);
+        vme_state_message[14] = (mb_cost_table[2] & 0xFFFF);
+        vme_state_message[16] = mb_cost_table[0];
+        vme_state_message[17] = mb_cost_table[1];
+        vme_state_message[18] = mb_cost_table[3];
+        vme_state_message[19] = mb_cost_table[4];
+
+        qp += 2;
+        if (qp > 51) {
+            qp = 51;
+        }
+        mb_cost_table = (unsigned int *)(cost_ptr + qp * 32);
+        vme_state_message[14] |= ((mb_cost_table[2] & 0xFFFF) << 16);
+        vme_state_message[20] = mb_cost_table[0];
+        vme_state_message[21] = mb_cost_table[1];
+        vme_state_message[22] = mb_cost_table[3];
+        vme_state_message[23] = mb_cost_table[4];
+
+        qp += 2;
+        if (qp > 51) {
+            qp = 51;
+        }
+        vme_state_message[15] = (mb_cost_table[2] & 0xFFFF);
+        vme_state_message[24] = mb_cost_table[0];
+        vme_state_message[25] = mb_cost_table[1];
+        vme_state_message[26] = mb_cost_table[3];
+        vme_state_message[27] = mb_cost_table[4];
+
+        qp += 2;
+        if (qp > 51) {
+            qp = 51;
+        }
+        mb_cost_table = (unsigned int *)(cost_ptr + qp * 32);
+        vme_state_message[15] |= ((mb_cost_table[2] & 0xFFFF) << 16);
+        vme_state_message[28] = mb_cost_table[0];
+        vme_state_message[29] = mb_cost_table[1];
+        vme_state_message[30] = mb_cost_table[3];
+        vme_state_message[31] = mb_cost_table[4];
     }
 
+    dri_bo_unmap(cost_bo);
     dri_bo_unmap( vme_context->vme_state.bo);
     return VA_STATUS_SUCCESS;
 }
@@ -490,7 +549,16 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
     int mb_x = 0, mb_y = 0;
     int i, s, j;
     unsigned int *command_ptr;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
 
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    else
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
 
     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
@@ -540,7 +608,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
                     mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
                 }
 
-                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
                 *command_ptr++ = kernel;
                 *command_ptr++ = 0;
                 *command_ptr++ = 0;
@@ -551,6 +619,7 @@ gen7_vme_fill_vme_batchbuffer(VADriverContextP ctx,
                 *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
                 *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
 
+                *command_ptr++ = qp;
                 i += 1;
             }
 
diff --git a/src/shaders/vme/inter_bframe_ivb.asm b/src/shaders/vme/inter_bframe_ivb.asm
index 499e426..1cba8c6 100644
--- a/src/shaders/vme/inter_bframe_ivb.asm
+++ b/src/shaders/vme/inter_bframe_ivb.asm
@@ -542,8 +542,19 @@ mov  (1) vme_m1.16<1>:ud	mb_mvp_ref.0<0,1,0>:ud	{align1};
 mov  (1) vme_m1.20<1>:ud	mb_mvp_ref.4<0,1,0>:ud	{align1};
 mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD {align1};
 
+mov  (1) tmp_reg0.0<1>:ud        qp_ub<0,1,0>:ub    {align1};
+/* lut_subindex */
+and  (1) tmp_reg1.0<1>:ud        tmp_reg0.0<0,1,0>:ud 0x06:ud {align1};
+shl  (1) tmp_reg0.4<1>:ud        tmp_reg1.0<0,1,0>:ud 10:ud {align1};
+
+/* lut_index */
+and  (1) tmp_reg1.0<1>:ud        tmp_reg0.0<0,1,0>:ud 0x038:ud {align1};
+shl  (1) tmp_reg1.4<1>:ud        tmp_reg1.0<0,1,0>:ud 5:ud {align1};
+
+add  (1) tmp_reg0.0<1>:ud        tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1};
 /* Use one register as the descriptor of send instruction instead of hardcode*/
-mov  (1) a0.0<1>:ud              0x0a686000:ud {align1};
+
+add  (1) a0.0<1>:ud              tmp_reg0.0<0,1,0>:ud 0x0a686000:ud {align1};
 send (1) vme_wb.0<1>:ud   vme_msg_0    0x08   a0.0<0,1,0>:ud {align1};
 
 and.z.f0.0      (1)     null<1>:ud              vme_wb0.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
diff --git a/src/shaders/vme/inter_bframe_ivb.g7b b/src/shaders/vme/inter_bframe_ivb.g7b
index 7f24b63..79eb292 100644
--- a/src/shaders/vme/inter_bframe_ivb.g7b
+++ b/src/shaders/vme/inter_bframe_ivb.g7b
@@ -53,7 +53,7 @@
    { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
    { 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x0000024a },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000256 },
    { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
    { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
    { 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 },
@@ -91,7 +91,7 @@
    { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
    { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000001fe },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x0000020a },
    { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
    { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
    { 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 },
@@ -127,7 +127,7 @@
    { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
    { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000001b6 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x000001c2 },
    { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
    { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
    { 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 },
@@ -164,7 +164,7 @@
    { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
    { 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x0000016c },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000178 },
    { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
    { 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 },
    { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
@@ -205,13 +205,13 @@
    { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
    { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x00000106 },
    { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
    { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
    { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
    { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000000ee },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x000000fa },
    { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
    { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
    { 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 },
@@ -230,13 +230,13 @@
    { 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 },
    { 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x000000d4 },
    { 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 },
    { 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 },
    { 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 },
    { 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000000bc },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x000000c8 },
    { 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 },
    { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 },
    { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 },
@@ -276,7 +276,13 @@
    { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 },
    { 0x00000001, 0x24740021, 0x00000ac4, 0x00000000 },
    { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
-   { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 },
+   { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
+   { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 },
+   { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a },
+   { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 },
+   { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 },
+   { 0x00000040, 0x24000421, 0x00000404, 0x00000424 },
+   { 0x00000040, 0x22000c20, 0x00000400, 0x0a686000 },
    { 0x08000031, 0x21800221, 0x00000800, 0x00000200 },
    { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
    { 0x00110020, 0x34001c00, 0x00001400, 0x0000002c },
diff --git a/src/shaders/vme/inter_frame_ivb.asm b/src/shaders/vme/inter_frame_ivb.asm
index 1d67c50..8124edf 100644
--- a/src/shaders/vme/inter_frame_ivb.asm
+++ b/src/shaders/vme/inter_frame_ivb.asm
@@ -457,9 +457,19 @@ mov  (1) vme_m1.16<1>:ud	mb_mvp_ref.0<0,1,0>:ud	{align1};
 mov  (1) vme_m1.20<1>:ud	mb_mvp_ref.0<0,1,0>:ud	{align1};
 mov  (8) vme_msg_1.0<1>:UD      vme_m1.0<8,8,1>:UD {align1};
 
+mov  (1) tmp_reg0.0<1>:ud        qp_ub<0,1,0>:ub    {align1};
+/* lut_subindex */
+and  (1) tmp_reg1.0<1>:ud        tmp_reg0.0<0,1,0>:ud 0x06:ud {align1};
+shl  (1) tmp_reg0.4<1>:ud        tmp_reg1.0<0,1,0>:ud 10:ud {align1};
 
+/* lut_index */
+and  (1) tmp_reg1.0<1>:ud        tmp_reg0.0<0,1,0>:ud 0x038:ud {align1};
+shl  (1) tmp_reg1.4<1>:ud        tmp_reg1.0<0,1,0>:ud 5:ud {align1};
+
+add  (1) tmp_reg0.0<1>:ud        tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1};
 /* Use one register as the descriptor of send instruction instead of hardcode*/
-mov  (1) a0.0<1>:ud              0x0a686000:ud {align1};
+
+add  (1) a0.0<1>:ud              tmp_reg0.0<0,1,0>:ud 0x0a686000:ud {align1};
 send (1) vme_wb.0<1>:ud   vme_msg_0    0x08   a0.0<0,1,0>:ud {align1};
 
 and.z.f0.0      (1)     null<1>:ud              vme_wb0.0<0,1,0>:ud     INTRAMBFLAG_MASK:ud {align1} ;
diff --git a/src/shaders/vme/inter_frame_ivb.g7b b/src/shaders/vme/inter_frame_ivb.g7b
index df9572f..9f0dfae 100644
--- a/src/shaders/vme/inter_frame_ivb.g7b
+++ b/src/shaders/vme/inter_frame_ivb.g7b
@@ -141,13 +141,13 @@
    { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
    { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x000000de },
    { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
    { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
    { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
    { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
    { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
-   { 0x00000020, 0x34001c00, 0x00001400, 0x000000c6 },
+   { 0x00000020, 0x34001c00, 0x00001400, 0x000000d2 },
    { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
    { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
    { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
@@ -193,7 +193,13 @@
    { 0x00000001, 0x24700021, 0x00000ac0, 0x00000000 },
    { 0x00000001, 0x24740021, 0x00000ac0, 0x00000000 },
    { 0x00600001, 0x28200021, 0x008d0460, 0x00000000 },
-   { 0x00000001, 0x22000060, 0x00000000, 0x0a686000 },
+   { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
+   { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 },
+   { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a },
+   { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 },
+   { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 },
+   { 0x00000040, 0x24000421, 0x00000404, 0x00000424 },
+   { 0x00000040, 0x22000c20, 0x00000400, 0x0a686000 },
    { 0x08000031, 0x21800221, 0x00000800, 0x00000200 },
    { 0x01000005, 0x20000c20, 0x00000180, 0x00002000 },
    { 0x00110020, 0x34001c00, 0x00001400, 0x0000002c },
diff --git a/src/shaders/vme/intra_frame_ivb.asm b/src/shaders/vme/intra_frame_ivb.asm
index 9efbfdc..97a20eb 100644
--- a/src/shaders/vme/intra_frame_ivb.asm
+++ b/src/shaders/vme/intra_frame_ivb.asm
@@ -104,8 +104,19 @@ mov  (8) vme_msg_4<1>:UD         0x0 {align1};
 mov (16) vme_msg_4.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
 mov  (1) vme_msg_4.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
 
+mov  (1) tmp_reg0.0<1>:ud        qp_ub<0,1,0>:ub    {align1};
+/* lut_subindex */
+and  (1) tmp_reg1.0<1>:ud        tmp_reg0.0<0,1,0>:ud 0x06:ud {align1};
+shl  (1) tmp_reg0.4<1>:ud        tmp_reg1.0<0,1,0>:ud 10:ud {align1};
+
+/* lut_index */
+and  (1) tmp_reg1.0<1>:ud        tmp_reg0.0<0,1,0>:ud 0x038:ud {align1};
+shl  (1) tmp_reg1.4<1>:ud        tmp_reg1.0<0,1,0>:ud 5:ud {align1};
+
+add  (1) tmp_reg0.0<1>:ud        tmp_reg0.4<0,1,0>:ud tmp_reg1.4<0,1,0>:ud {align1};
 /* Use one register as the descriptor of send instruction instead of hardcode*/
-mov  (1) a0.0<1>:ud             0x0a184000:ud {align1};
+
+add  (1) a0.0<1>:ud              tmp_reg0.0<0,1,0>:ud 0x0a184000:ud {align1};
 send (1) vme_wb.0<1>:ud   vme_msg_0    0x08   a0.0<0,1,0>:ud {align1};
 
 /*
diff --git a/src/shaders/vme/intra_frame_ivb.g7b b/src/shaders/vme/intra_frame_ivb.g7b
index 7dd16fc..d2aa1eb 100644
--- a/src/shaders/vme/intra_frame_ivb.g7b
+++ b/src/shaders/vme/intra_frame_ivb.g7b
@@ -35,7 +35,13 @@
    { 0x00600001, 0x288000e1, 0x00000000, 0x00000000 },
    { 0x00800001, 0x28800231, 0x00cf03a3, 0x00000000 },
    { 0x00000001, 0x28900061, 0x00000000, 0x11111111 },
-   { 0x00000001, 0x22000060, 0x00000000, 0x0a184000 },
+   { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
+   { 0x00000005, 0x24200c21, 0x00000400, 0x00000006 },
+   { 0x00000009, 0x24040c21, 0x00000420, 0x0000000a },
+   { 0x00000005, 0x24200c21, 0x00000400, 0x00000038 },
+   { 0x00000009, 0x24240c21, 0x00000420, 0x00000005 },
+   { 0x00000040, 0x24000421, 0x00000404, 0x00000424 },
+   { 0x00000040, 0x22000c20, 0x00000400, 0x0a184000 },
    { 0x08000031, 0x21800221, 0x00000800, 0x00000200 },
    { 0x00600001, 0x28000021, 0x008d0480, 0x00000000 },
    { 0x00000001, 0x28200021, 0x00000180, 0x00000000 },
diff --git a/src/shaders/vme/vme7.inc b/src/shaders/vme/vme7.inc
index acff81f..8c1731c 100644
--- a/src/shaders/vme/vme7.inc
+++ b/src/shaders/vme/vme7.inc
@@ -152,6 +152,7 @@ define(`input_mb_intra_ub',     `inline_reg0.5')
 define(`num_macroblocks',       `inline_reg0.6')
 define(`quality_level_ub',      `inline_reg0.7')
 
+define(`qp_ub',                 `inline_reg0.8')
 /*
  * GRF 6~11 -- reserved
  */
-- 
2.7.4



More information about the Libva mailing list