[Libva] [PATCH 03/14] Encoding: VME shader reads mbmv_cost from cost_table surface instead of constant buffer on Haswell

Xiang, Haihao haihao.xiang at intel.com
Fri Jul 1 05:27:00 UTC 2016


On Thu, 2016-06-30 at 10:33 +0800, Pengfei Qu wrote:
> From: Zhao Yakui <yakui.zhao at intel.com>
> 
> This is to do the prepartion of enhanced features.
> 
> Signed-off-by: Zhao Yakui <yakui.zhao at intel.com>
> Signed-off-by: pjl <ceciliapeng at intel.com>
> Signed-off-by: Pengfei Qu <Pengfei.Qu at intel.com>
> ---
>  src/gen75_vme.c                           | 14 +++++++++++++-
>  src/shaders/vme/inter_bframe_haswell.asm  | 24
> +++++++++++++++++++++++-
>  src/shaders/vme/inter_bframe_haswell.g75b | 23 ++++++++++++++-------
> --
>  src/shaders/vme/inter_frame_haswell.asm   | 24
> ++++++++++++++++++++++--
>  src/shaders/vme/inter_frame_haswell.g75b  | 11 ++++++++---
>  src/shaders/vme/intra_frame_haswell.asm   | 23
> ++++++++++++++++++++++-
>  src/shaders/vme/intra_frame_haswell.g75b  |  7 ++++++-
>  src/shaders/vme/vme75.inc                 |  4 ++++
>  8 files changed, 112 insertions(+), 18 deletions(-)
> 
> diff --git a/src/gen75_vme.c b/src/gen75_vme.c
> index 8434e6f..b2e7812 100644
> --- a/src/gen75_vme.c
> +++ b/src/gen75_vme.c
> @@ -491,6 +491,16 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP
> ctx,
>      int mb_x = 0, mb_y = 0;
>      int i, s;
>      unsigned int *command_ptr;
> +    struct gen6_mfc_context *mfc_context = encoder_context-
> >mfc_context;
> +    VAEncPictureParameterBufferH264 *pic_param =
> (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext-
> >buffer;
> +    VAEncSliceParameterBufferH264 *slice_param =
> (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]-
> >buffer;
> +    int qp;
> +    int slice_type = intel_avc_enc_slice_type_fixup(slice_param-
> >slice_type);
> +
> +    if (encoder_context->rate_control_mode == VA_RC_CQP)
> +        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> +    else
> +        qp = mfc_context-
> >bit_rate_control_context[slice_type].QpPrimeY;
>  
>      dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
>      command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> @@ -528,7 +538,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP
> ctx,
>  	    if ((i == mb_width) && slice_mb_x) {
>  		mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
>  	    }
> -            *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +            *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
>              *command_ptr++ = kernel;
>              *command_ptr++ = 0;
>              *command_ptr++ = 0;
> @@ -538,6 +548,8 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP
> ctx,
>              /*inline data */
>              *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
>              *command_ptr++ = ((encoder_context->quality_level << 24)
> | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +            /* qp occupies one byte */
> +            *command_ptr++ = qp;

[patch 02/14] has a similar change, is it OK to merge the above changes
in two patches into one patch ?

>  
>              i += 1;
>          } 
> diff --git a/src/shaders/vme/inter_bframe_haswell.asm
> b/src/shaders/vme/inter_bframe_haswell.asm
> index ff91487..dbc15c5 100644
> --- a/src/shaders/vme/inter_bframe_haswell.asm
> +++ b/src/shaders/vme/inter_bframe_haswell.asm
> @@ -485,9 +485,31 @@ __mb_hwdep_end:
>  asr	(4)	mb_ref_win.0<1>:w	mb_mvp_ref.0<4,4,1>:w
> 	2:w	{align1};
>  add	(4)	mb_ref_win.8<1>:w	mb_ref_win.0<4,4,1>:w
> 	3:w	{align1};
>  and	(4)	mb_ref_win.16<1>:uw	mb_ref_win.8<4,4,1>
> :uw	0xFFFC:uw {align1};
> +
> +mov  (8) msg_reg0.0<1>:ud   0:ud {align1};
> +mov  (1) msg_reg0.20<1>:UB  thread_id_ub
> {align1};                  /* dispatch id */
> +mov  (1) tmp_reg0.0<1>:UD   qp_ub<0,1,0>:ub {align1};
> +mul  (1) msg_reg0.8<1>:ud tmp_reg0.0<1>:ud 2:ud {align1};
> +
> +send (16)
> +        msg_ind
> +        vme_cost_wb.0<1>:ud
> +        null
> +        data_port(
> +                OBR_CACHE_TYPE,
> +                OBR_MESSAGE_TYPE,
> +                OBR_CONTROL_2,
> +                BIND_IDX_COST,
> +                OBR_WRITE_COMMIT_CATEGORY,
> +                OBR_HEADER_PRESENT
> +        )
> +        mlen 1
> +        rlen 1
> +        {align1};
> +
>  /* m2, get the MV/Mb cost passed from constant buffer when
>  spawning thread by MEDIA_OBJECT */       
> -mov (8) vme_m2<1>:UD            r1.0<8,8,1>:UD {align1};
> +mov (8) vme_m2<1>:UD            vme_cost_wb.0<8,8,1>:UD {align1};
>  
>  mov (8) vme_msg_2<1>:UD		vme_m2.0<8,8,1>:UD {align1};
>  
> diff --git a/src/shaders/vme/inter_bframe_haswell.g75b
> b/src/shaders/vme/inter_bframe_haswell.g75b
> index cabef20..113eac0 100644
> --- a/src/shaders/vme/inter_bframe_haswell.g75b
> +++ b/src/shaders/vme/inter_bframe_haswell.g75b
> @@ -64,7 +64,7 @@
>     { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
>     { 0x00000001, 0x2fa80061, 0x00000000, 0x00000001 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x000013e0 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00001430 },
>     { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
>     { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
>     { 0x00010001, 0x2af001e9, 0x00000000, 0x00010001 },
> @@ -100,7 +100,7 @@
>     { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
>     { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x000011a0 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x000011f0 },
>     { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
>     { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
>     { 0x00010001, 0x2b1001e9, 0x00000000, 0x00010001 },
> @@ -135,7 +135,7 @@
>     { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
>     { 0x00000001, 0x2fa80061, 0x00000000, 0x00000002 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x00000f70 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000fc0 },
>     { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
>     { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
>     { 0x00010001, 0x2b3001e9, 0x00000000, 0x00010001 },
> @@ -170,7 +170,7 @@
>     { 0x00000001, 0x2fa40021, 0x00000b80, 0x00000000 },
>     { 0x00000001, 0x2fa80061, 0x00000000, 0x00000003 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x00000d40 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000d90 },
>     { 0x00000001, 0x2aa00129, 0x00000fe4, 0x00000000 },
>     { 0x00000001, 0x2b3201ed, 0x00000000, 0x00010001 },
>     { 0x01000010, 0x20002d28, 0x00000aa0, 0x00000000 },
> @@ -211,13 +211,13 @@
>     { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
>     { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x000009b0 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000a00 },
>     { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
>     { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
>     { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
>     { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x00000950 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x000009a0 },
>     { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
>     { 0x00600001, 0x24000061, 0x00000000, 0x00000000 },
>     { 0x01000010, 0x20003da4, 0x00200af6, 0x00000000 },
> @@ -236,18 +236,23 @@
>     { 0x00000001, 0x2fa401ad, 0x00000b08, 0x00000000 },
>     { 0x00000001, 0x2fa801ad, 0x00000b28, 0x00000000 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x00000820 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000870 },
>     { 0x00000001, 0x2ac401ad, 0x00000fe4, 0x00000000 },
>     { 0x00000001, 0x2fa001ad, 0x00000aea, 0x00000000 },
>     { 0x00000001, 0x2fa401ad, 0x00000b0a, 0x00000000 },
>     { 0x00000001, 0x2fa801ad, 0x00000b2a, 0x00000000 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x000007c0 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000810 },
>     { 0x00000001, 0x2ac601ad, 0x00000fe4, 0x00000000 },
>     { 0x0040000c, 0x2a803dad, 0x00690ac0, 0x00020002 },
>     { 0x00400040, 0x2a883dad, 0x00690a80, 0x00030003 },
>     { 0x00400005, 0x2a902d29, 0x00690a88, 0xfffcfffc },
> -   { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
> +   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
> +   { 0x00000001, 0x28140231, 0x00000014, 0x00000000 },
> +   { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
> +   { 0x00000041, 0x28080c21, 0x00200400, 0x00000002 },
> +   { 0x0a800031, 0x25801ca1, 0x00000800, 0x02180208 },
> +   { 0x00600001, 0x25600021, 0x008d0580, 0x00000000 },
>     { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
>     { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
>     { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
> diff --git a/src/shaders/vme/inter_frame_haswell.asm
> b/src/shaders/vme/inter_frame_haswell.asm
> index 8a829f6..7f08d3e 100644
> --- a/src/shaders/vme/inter_frame_haswell.asm
> +++ b/src/shaders/vme/inter_frame_haswell.asm
> @@ -387,10 +387,30 @@ __mb_hwdep_end:
>  asr	(2)	mb_ref_win.0<1>:w	mb_mvp_ref.0<2,2,1>:w
> 	2:w	{align1};
>  add	(2)	mb_ref_win.8<1>:w	mb_ref_win.0<2,2,1>:w
> 	3:w	{align1};
>  and	(2)	mb_ref_win.16<1>:uw	mb_ref_win.8<2,2,1>
> :uw	0xFFFC:uw {align1};
> -        
> +
> +mov  (8) msg_reg0.0<1>:ud   0:ud {align1};
> +mov  (1) msg_reg0.20<1>:UB  thread_id_ub
> {align1};                  /* dispatch id */
> +mov  (1) tmp_reg0.0<1>:UD   qp_ub<0,1,0>:ub {align1};
> +mul  (1) msg_reg0.8<1>:ud tmp_reg0.0<1>:ud 2:ud {align1};
> +
> +send (16)
> +        msg_ind
> +        vme_cost_wb.0<1>:ud
> +        null
> +        data_port(
> +                OBR_CACHE_TYPE,
> +                OBR_MESSAGE_TYPE,
> +                OBR_CONTROL_2,
> +                BIND_IDX_COST,
> +                OBR_WRITE_COMMIT_CATEGORY,
> +                OBR_HEADER_PRESENT
> +        )
> +        mlen 1
> +        rlen 1
> +        {align1};
>  /* m2, get the MV/Mb cost passed from constant buffer when
>  spawning thread by MEDIA_OBJECT */       
> -mov (8) vme_m2<1>:UD            r1.0<8,8,1>:UD {align1};
> +mov (8) vme_m2<1>:UD            vme_cost_wb.0<8,8,1>:UD {align1};
>  
>  mov (8) vme_msg_2<1>:UD		vme_m2.0<8,8,1>:UD {align1};
>  
> diff --git a/src/shaders/vme/inter_frame_haswell.g75b
> b/src/shaders/vme/inter_frame_haswell.g75b
> index 1a60c51..5aa520e 100644
> --- a/src/shaders/vme/inter_frame_haswell.g75b
> +++ b/src/shaders/vme/inter_frame_haswell.g75b
> @@ -145,18 +145,23 @@
>     { 0x00000001, 0x2fa401ad, 0x00000b04, 0x00000000 },
>     { 0x00000001, 0x2fa801ad, 0x00000b24, 0x00000000 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x00000930 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000980 },
>     { 0x00000001, 0x2ac001ad, 0x00000fe4, 0x00000000 },
>     { 0x00000001, 0x2fa001ad, 0x00000ae6, 0x00000000 },
>     { 0x00000001, 0x2fa401ad, 0x00000b06, 0x00000000 },
>     { 0x00000001, 0x2fa801ad, 0x00000b26, 0x00000000 },
>     { 0x00000040, 0x2fe00c01, 0x00001400, 0x00000020 },
> -   { 0x00000020, 0x34001c00, 0x00001400, 0x000008d0 },
> +   { 0x00000020, 0x34001c00, 0x00001400, 0x00000920 },
>     { 0x00000001, 0x2ac201ad, 0x00000fe4, 0x00000000 },
>     { 0x0020000c, 0x2a803dad, 0x00450ac0, 0x00020002 },
>     { 0x00200040, 0x2a883dad, 0x00450a80, 0x00030003 },
>     { 0x00200005, 0x2a902d29, 0x00450a88, 0xfffcfffc },
> -   { 0x00600001, 0x25600021, 0x008d0020, 0x00000000 },
> +   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
> +   { 0x00000001, 0x28140231, 0x00000014, 0x00000000 },
> +   { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
> +   { 0x00000041, 0x28080c21, 0x00200400, 0x00000002 },
> +   { 0x0a800031, 0x25801ca1, 0x00000800, 0x02180208 },
> +   { 0x00600001, 0x25600021, 0x008d0580, 0x00000000 },
>     { 0x00600001, 0x28400021, 0x008d0560, 0x00000000 },
>     { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
>     { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
> diff --git a/src/shaders/vme/intra_frame_haswell.asm
> b/src/shaders/vme/intra_frame_haswell.asm
> index c4cb76c..ac4a6ba 100644
> --- a/src/shaders/vme/intra_frame_haswell.asm
> +++ b/src/shaders/vme/intra_frame_haswell.asm
> @@ -89,9 +89,30 @@ mov  (1) read1_header.8<1>:UD   BLOCK_8X4
> {align1};
>  mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD
> {align1};                
>  send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4)
> mlen 1 rlen 1 {align1};
>  
> +mov  (8) msg_reg0.0<1>:ud   0:ud {align1};
> +mov  (1) msg_reg0.20<1>:UB  thread_id_ub
> {align1};                  /* dispatch id */
> +mov  (1) tmp_reg0.0<1>:UD   qp_ub<0,1,0>:ub {align1};
> +mul  (1) msg_reg0.8<1>:ud tmp_reg0.0<1>:ud 2:ud {align1};
> +
> +send (16)
> +        msg_ind
> +        vme_cost_wb.0<1>:ud
> +        null
> +        data_port(
> +                OBR_CACHE_TYPE,
> +                OBR_MESSAGE_TYPE,
> +                OBR_CONTROL_2,
> +                BIND_IDX_COST,
> +                OBR_WRITE_COMMIT_CATEGORY,
> +                OBR_HEADER_PRESENT
> +        )
> +        mlen 1
> +        rlen 1
> +        {align1};
> +
>  /* m2, get the MV/Mb cost passed by constant buffer 
>  when creating EU thread by MEDIA_OBJECT */       
> -mov (8) vme_msg_2<1>:UD         r1.0<8,8,1>:UD {align1};
> +mov (8) vme_msg_2<1>:UD         vme_cost_wb<8,8,1>:UD {align1};
>  
>  /* m3 */
>  mov (8) vme_msg_3<1>:UD		0x0:UD {align1};	     
>    
> diff --git a/src/shaders/vme/intra_frame_haswell.g75b
> b/src/shaders/vme/intra_frame_haswell.g75b
> index 4bb8ad9..88866d3 100644
> --- a/src/shaders/vme/intra_frame_haswell.g75b
> +++ b/src/shaders/vme/intra_frame_haswell.g75b
> @@ -33,7 +33,12 @@
>     { 0x00000001, 0x242800e1, 0x00000000, 0x00070003 },
>     { 0x00600001, 0x28000021, 0x008d0420, 0x00000000 },
>     { 0x04600031, 0x26201cb1, 0x00000800, 0x02190006 },
> -   { 0x00600001, 0x28400021, 0x008d0020, 0x00000000 },
> +   { 0x00600001, 0x28000061, 0x00000000, 0x00000000 },
> +   { 0x00000001, 0x28140231, 0x00000014, 0x00000000 },
> +   { 0x00000001, 0x24000221, 0x000000a8, 0x00000000 },
> +   { 0x00000041, 0x28080c21, 0x00200400, 0x00000002 },
> +   { 0x0a800031, 0x25801ca1, 0x00000800, 0x02180208 },
> +   { 0x00600001, 0x28400021, 0x008d0580, 0x00000000 },
>     { 0x00600001, 0x28600061, 0x00000000, 0x00000000 },
>     { 0x00000001, 0x23800061, 0x00000000, 0x00000000 },
>     { 0x00000005, 0x23840c21, 0x00000384, 0xff000000 },
> diff --git a/src/shaders/vme/vme75.inc b/src/shaders/vme/vme75.inc
> index 75c461e..fa7aa7e 100644
> --- a/src/shaders/vme/vme75.inc
> +++ b/src/shaders/vme/vme75.inc
> @@ -56,6 +56,7 @@ define(`BIND_IDX_VME_REF0',             `1')
>  define(`BIND_IDX_VME_REF1',             `2')
>  define(`BIND_IDX_OUTPUT',               `3')
>  define(`BIND_IDX_INEP',                 `4')
> +define(`BIND_IDX_COST',                 `8')
>  
>  define(`SUB_PEL_MODE_INTEGER',          `0x00000000')
>  define(`SUB_PEL_MODE_HALF',             `0x00001000')
> @@ -153,6 +154,8 @@ define(`input_mb_intra_ub',     `inline_reg0.5')
>  define(`num_macroblocks',       `inline_reg0.6')
>  define(`quality_level_ub',      `inline_reg0.7')
>  
> +define(`qp_ub',                 `inline_reg0.8')
> +
>  /*
>   * GRF 6~11 -- reserved
>   */
> @@ -235,6 +238,7 @@ define(`tmp_uw6',               `tmp_rega.12')
>  define(`tmp_uw7',               `tmp_rega.14')
>  
>  define(`vme_m2',                `r43')
> +define(`vme_cost_wb',                `r44')
>  /*
>   * MRF registers
>   */        


More information about the Libva mailing list