[Libva] [PATCH v3 1/8] VP8 HWEnc: Add vp8 gen9 intra frame encoding shader

Zhong Li zhong.li at intel.com
Thu Jan 15 06:04:52 PST 2015


Signed-off-by: Zhong Li <zhong.li at intel.com>
---
 src/shaders/vme/Makefile.am              |   6 +-
 src/shaders/vme/vme8.inc                 |   5 +
 src/shaders/vme/vp8_intra_frame_gen9.asm | 200 +++++++++++++++++++++++++++++++
 src/shaders/vme/vp8_intra_frame_gen9.g9a |   2 +
 src/shaders/vme/vp8_intra_frame_gen9.g9b |  73 +++++++++++
 5 files changed, 283 insertions(+), 3 deletions(-)
 create mode 100644 src/shaders/vme/vp8_intra_frame_gen9.asm
 create mode 100644 src/shaders/vme/vp8_intra_frame_gen9.g9a
 create mode 100644 src/shaders/vme/vp8_intra_frame_gen9.g9b

diff --git a/src/shaders/vme/Makefile.am b/src/shaders/vme/Makefile.am
index 2d2f54a..123378f 100644
--- a/src/shaders/vme/Makefile.am
+++ b/src/shaders/vme/Makefile.am
@@ -2,7 +2,7 @@ VME_CORE	= batchbuffer.asm intra_frame.asm inter_frame.asm
 VME7_CORE	= batchbuffer.asm intra_frame_ivb.asm inter_frame_ivb.asm inter_bframe_ivb.asm mpeg2_inter_ivb.asm
 VME75_CORE	= batchbuffer.asm intra_frame_haswell.asm inter_frame_haswell.asm inter_bframe_haswell.asm mpeg2_inter_haswell.asm
 VME8_CORE	= intra_frame_gen8.asm	inter_frame_gen8.asm inter_bframe_gen8.asm 
-VME9_CORE	= $(VME8_CORE) 
+VME9_CORE	= $(VME8_CORE) vp8_intra_frame_gen9.asm
 
 INTEL_G6B	= batchbuffer.g6b intra_frame.g6b inter_frame.g6b
 INTEL_G6A	= batchbuffer.g6a intra_frame.g6a inter_frame.g6a
@@ -26,8 +26,8 @@ INTEL_GEN8_INC	= vme8.inc vme75_mpeg2.inc
 INTEL_GEN8_ASM	= $(INTEL_G8A:%.g8a=%.gen8.asm)
 
 
-INTEL_G9B	= intra_frame_gen9.g9b inter_frame_gen9.g9b inter_bframe_gen9.g9b mpeg2_inter_gen9.g9b
-INTEL_G9A	= intra_frame_gen9.g9a inter_frame_gen9.g9a inter_bframe_gen9.g9a mpeg2_inter_gen9.g9a
+INTEL_G9B	= intra_frame_gen9.g9b inter_frame_gen9.g9b inter_bframe_gen9.g9b mpeg2_inter_gen9.g9b vp8_intra_frame_gen9.g9b
+INTEL_G9A	= intra_frame_gen9.g9a inter_frame_gen9.g9a inter_bframe_gen9.g9a mpeg2_inter_gen9.g9a vp8_intra_frame_gen9.g9a
 INTEL_GEN9_INC	= $(INTEL_GEN8_INC)
 INTEL_GEN9_ASM	= $(INTEL_G9A:%.g9a=%.gen9.asm)
 
diff --git a/src/shaders/vme/vme8.inc b/src/shaders/vme/vme8.inc
index 5d8dfa4..660571d 100644
--- a/src/shaders/vme/vme8.inc
+++ b/src/shaders/vme/vme8.inc
@@ -46,6 +46,9 @@ define(`LUMA_INTRA_16x16_DISABLE',      `0x1')
 define(`LUMA_INTRA_8x8_DISABLE',        `0x2')
 define(`LUMA_INTRA_4x4_DISABLE',        `0x4')
 
+define(`SUB_PART_8x4_DISABLE',          `0x20')
+define(`SUB_PART_4x8_DISABLE',          `0x40')
+
 define(`INTRA_PRED_AVAIL_FLAG_AE',      `0x60')
 define(`INTRA_PRED_AVAIL_FLAG_B',       `0x10')
 define(`INTRA_PRED_AVAIL_FLAG_C',       `0x8')
@@ -88,6 +91,8 @@ define(`START_CENTER',                  `0x30000000')
 define(`ADAPTIVE_SEARCH_ENABLE',        `0x00000002') 
 define(`INTRA_PREDICTORE_MODE',         `0x11111111:UD')
 
+define(`INTRA_PLANAR_MODE_MASK',        `0x10001000:UD')
+
 define(`INTER_VME_OUTPUT_IN_OWS',       `10')
 define(`INTER_VME_OUTPUT_MV_IN_OWS',    `8')
 
diff --git a/src/shaders/vme/vp8_intra_frame_gen9.asm b/src/shaders/vme/vp8_intra_frame_gen9.asm
new file mode 100644
index 0000000..f1e7891
--- /dev/null
+++ b/src/shaders/vme/vp8_intra_frame_gen9.asm
@@ -0,0 +1,200 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Zhao Yakui <yakui.zhao at intel.com>
+ *    Xiang Haihao <haihao.xiang at intel.com>
+ *    Li Zhong <zhong.li at intel.com>
+ *
+ */
+
+/*
+ * __START
+ */
+__INTRA_START:
+mov  (16) tmp_reg0.0<1>:UD      0x0:UD {align1};
+mov  (16) tmp_reg2.0<1>:UD      0x0:UD {align1};
+mov  (16) tmp_reg4.0<1>:UD      0x0:UD {align1} ;
+mov  (16) tmp_reg6.0<1>:UD      0x0:UD {align1} ;
+
+shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
+add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */ 
+mov  (1) read0_header.8<1>:UD   BLOCK_32X1 {align1};
+mov  (1) read0_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
+
+shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
+mov  (1) read1_header.8<1>:UD   BLOCK_4X16 {align1};
+mov  (1) read1_header.20<1>:UB  thread_id_ub {align1};                  /* dispatch id */
+        
+shl  (2) vme_m0.8<1>:UW         orig_xy_ub<2,2,1>:UB 4:UW {align1};    /* (x, y) * 16 */
+mov  (1) vme_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
+
+mul  (1) obw_m0.8<1>:UD         w_in_mb_uw<0,1,0>:UW orig_y_ub<0,1,0>:UB {align1};
+add  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD orig_x_ub<0,1,0>:UB {align1};
+mul  (1) obw_m0.8<1>:UD         obw_m0.8<0,1,0>:UD 0x02:UD {align1};
+mov  (1) obw_m0.20<1>:UB        thread_id_ub {align1};                  /* dispatch id */
+        
+/*
+ * Media Read Message -- fetch Luma neighbor edge pixels 
+ */
+/* ROW */
+mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
+send (8) msg_ind INEP_ROW<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
+send (8) msg_ind INEP_COL0<1>:UB null read(BIND_IDX_INEP, 0, 0, 4) mlen 1 rlen 2 {align1};
+        
+/*
+ * Media Read Message -- fetch Chroma neighbor edge pixels 
+ */
+/* ROW */
+shl  (2) read0_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16 , y * 8 */
+mul  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D  2:W {align1};
+add  (1) read0_header.0<1>:D    read0_header.0<0,1,0>:D -8:W {align1};     /* X offset */
+add  (1) read0_header.4<1>:D    read0_header.4<0,1,0>:D -1:W {align1};     /* Y offset */ 
+mov  (8) msg_reg0.0<1>:UD       read0_header.0<8,8,1>:UD {align1};        
+send (8) msg_ind CHROMA_ROW<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* COL */
+shl  (2) read1_header.0<1>:D    orig_xy_ub<2,2,1>:UB 3:UW {align1};    /* x * 16, y * 8 */
+mul  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D  2:W {align1};
+add  (1) read1_header.0<1>:D    read1_header.0<0,1,0>:D -4:W {align1};     /* X offset */
+mov  (1) read1_header.8<1>:UD   BLOCK_8X4 {align1};
+mov  (8) msg_reg0.0<1>:UD       read1_header.0<8,8,1>:UD {align1};                
+send (8) msg_ind CHROMA_COL<1>:UB null read(BIND_IDX_CBCR, 0, 0, 4) mlen 1 rlen 1 {align1};
+
+/* m2, get the MV/Mb cost passed by constant buffer 
+when creating EU thread by MEDIA_OBJECT */       
+mov (8) vme_msg_2<1>:UD         r1.0<8,8,1>:UD {align1};
+
+/* m3. This is changed for FWD/BWD cost center */
+mov (8) vme_msg_3<1>:UD		0x0:UD {align1};	        
+
+/* m4.*/
+mov (8) vme_msg_4<1>:ud		0x0:ud	{align1};
+
+/* m5 */
+mov  (1) INEP_ROW.0<1>:UD       0x0:UD {align1};
+and  (1) INEP_ROW.4<1>:UD       INEP_ROW.4<0,1,0>:UD            0xFF000000:UD {align1};
+mov  (8) vme_msg_5<1>:UD        INEP_ROW.0<8,8,1>:UD {align1};
+
+mov  (1) tmp_reg0.0<1>:UB	INTRA_PLANAR_MODE_MASK {align1}; /* vp8 don't support planar intra mode */
+mov  (1) tmp_reg0.1<1>:UB	LUMA_CHROMA_MODE {align1}; /* Intra type: Luma + Chroma */
+
+/* Intra mode mask && Intra compute type */
+mov  (1) vme_msg_5.4<1>:UW	tmp_reg0.0<0,1,0>:UW {align1};
+
+/* m6 */        
+mov  (8) vme_msg_6<1>:UD         0x0:UD {align1};
+mov (16) vme_msg_6.0<1>:UB       INEP_COL0.3<32,8,4>:UB {align1};
+mov  (1) vme_msg_6.16<1>:UD      INTRA_PREDICTORE_MODE {align1};
+
+/* the penalty for Intra mode */
+mov  (1) vme_msg_6.28<1>:UD	0x010101:UD {align1};
+mov  (1) vme_msg_6.20<1>:UW      CHROMA_ROW.6<0,1,0>:UW {align1};
+
+
+/* m7 */
+
+mov  (4) vme_msg_7.16<1>:UD      CHROMA_ROW.8<4,4,1>:UD {align1};
+mov  (8) vme_msg_7.0<1>:UW       CHROMA_COL.2<16,8,2>:UW {align1};
+
+/*
+ * VME message
+ */
+
+/* m1 */
+mov  (1) intra_flag<1>:UW       0x0:UW {align1};
+mov  (1) intra_part_mask_ub<1>:UB  LUMA_INTRA_8x8_DISABLE {align1}; /* vp8 don't support intra_8x8 mode*/
+
+/* assign MB intra struct from the thread payload*/
+mov (1) mb_intra_struct_ub<1>:UB input_mb_intra_ub<0,1,0>:UB {align1}; 
+                           
+/* Disable DC HAAR component when calculating HARR SATD block */
+mov  (1) tmp_reg0.0<1>:UW	DC_HARR_DISABLE:UW		{align1};
+mov  (1) vme_m1.30<1>:UB	tmp_reg0.0<0,1,0>:UB  {align1};
+
+mov  (8) vme_msg_1<1>:UD        vme_m1.0<8,8,1>:UD {align1};
+
+/* m0 */        
+add  (1) vme_m0.12<1>:UD        vme_m0.12<0,1,0>:ud	INTRA_SAD_HAAR:UD {align1};/* 16x16 Source, Intra_harr */
+mov  (1) vme_m0.15<1>:UB        SUB_PART_8x4_DISABLE + SUB_PART_4x8_DISABLE {align1}; /* vp8 don't support 8x4 and 4x8 partion */
+mov  (8) vme_msg_0<1>:UD        vme_m0.0<8,8,1>:UD {align1};
+
+/* after verification it will be passed by using payload */
+send (8)
+        vme_msg_ind
+        vme_wb<1>:UD
+        null
+        cre(
+                BIND_IDX_VME,
+                VME_SIC_MESSAGE_TYPE
+        )
+        mlen sic_vme_msg_length
+        rlen vme_wb_length
+        {align1};
+/*
+ * Oword Block Write message
+ */
+mov  (8) msg_reg0.0<1>:UD       obw_m0<8,8,1>:UD {align1};
+        
+mov  (1) msg_reg1.0<1>:UD       vme_wb.0<0,1,0>:UD      {align1};
+mov  (1) msg_reg1.4<1>:UD       vme_wb.16<0,1,0>:UD     {align1};
+mov  (1) msg_reg1.8<1>:UD       vme_wb.20<0,1,0>:UD     {align1};
+mov  (1) msg_reg1.12<1>:UD      vme_wb.24<0,1,0>:UD     {align1};
+
+/* Distortion, Intra (17-16), */
+mov  (1) msg_reg1.16<1>:UW      vme_wb.12<0,1,0>:UW     {align1};
+
+mov  (1) msg_reg1.20<1>:UD      vme_wb.8<0,1,0>:UD     {align1};
+/* VME clock counts */
+mov  (1) msg_reg1.24<1>:UD      vme_wb.28<0,1,0>:UD     {align1};
+
+mov  (1) msg_reg1.28<1>:UD      obw_m0.8<0,1,0>:UD     {align1};
+
+/* bind index 3, write 2 oword (32bytes), msg type: 8(OWord Block Write) */
+send (16)
+        msg_ind
+        obw_wb
+        null
+        data_port(
+                OBW_CACHE_TYPE,
+                OBW_MESSAGE_TYPE,
+                OBW_CONTROL_2,
+                OBW_BIND_IDX,
+                OBW_WRITE_COMMIT_CATEGORY,
+                OBW_HEADER_PRESENT
+        )
+        mlen 2
+        rlen obw_wb_length
+        {align1};
+
+__EXIT: 
+/*
+ * kill thread
+ */        
+mov  (8) ts_msg_reg0<1>:UD         r0<8,8,1>:UD {align1};
+send (16) ts_msg_ind acc0<1>UW null thread_spawner(0, 0, 1) mlen 1 rlen 0 {align1 EOT};
diff --git a/src/shaders/vme/vp8_intra_frame_gen9.g9a b/src/shaders/vme/vp8_intra_frame_gen9.g9a
new file mode 100644
index 0000000..db0b9ff
--- /dev/null
+++ b/src/shaders/vme/vp8_intra_frame_gen9.g9a
@@ -0,0 +1,2 @@
+#include "vme8.inc"
+#include "vp8_intra_frame_gen9.asm"
diff --git a/src/shaders/vme/vp8_intra_frame_gen9.g9b b/src/shaders/vme/vp8_intra_frame_gen9.g9b
new file mode 100644
index 0000000..c4f9baf
--- /dev/null
+++ b/src/shaders/vme/vp8_intra_frame_gen9.g9b
@@ -0,0 +1,73 @@
+   { 0x00800001, 0x24000608, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24400608, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24800608, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x24c00608, 0x00000000, 0x00000000 },
+   { 0x00200009, 0x24002228, 0x164500a0, 0x00040004 },
+   { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+   { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+   { 0x00000001, 0x24080e08, 0x08000000, 0x0000001f },
+   { 0x00000001, 0x24142288, 0x00000014, 0x00000000 },
+   { 0x00200009, 0x24202228, 0x164500a0, 0x00040004 },
+   { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+   { 0x00000001, 0x24280e08, 0x08000000, 0x000f0003 },
+   { 0x00000001, 0x24342288, 0x00000014, 0x00000000 },
+   { 0x00200009, 0x24482248, 0x164500a0, 0x00040004 },
+   { 0x00000001, 0x24542288, 0x00000014, 0x00000000 },
+   { 0x00000041, 0x24881208, 0x220000a2, 0x000000a1 },
+   { 0x00000040, 0x24880208, 0x22000488, 0x000000a0 },
+   { 0x00000041, 0x24880208, 0x06000488, 0x00000002 },
+   { 0x00000001, 0x24942288, 0x00000014, 0x00000000 },
+   { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+   { 0x0c600031, 0x23800a88, 0x06000800, 0x02190004 },
+   { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+   { 0x0c600031, 0x23a00a88, 0x06000800, 0x02290004 },
+   { 0x00200009, 0x24002228, 0x164500a0, 0x00030003 },
+   { 0x00000041, 0x24000a28, 0x1e000400, 0x00020002 },
+   { 0x00000040, 0x24000a28, 0x1e000400, 0xfff8fff8 },
+   { 0x00000040, 0x24040a28, 0x1e000404, 0xffffffff },
+   { 0x00600001, 0x28000208, 0x008d0400, 0x00000000 },
+   { 0x0c600031, 0x26000a88, 0x06000800, 0x02190006 },
+   { 0x00200009, 0x24202228, 0x164500a0, 0x00030003 },
+   { 0x00000041, 0x24200a28, 0x1e000420, 0x00020002 },
+   { 0x00000040, 0x24200a28, 0x1e000420, 0xfffcfffc },
+   { 0x00000001, 0x24280e08, 0x08000000, 0x00070003 },
+   { 0x00600001, 0x28000208, 0x008d0420, 0x00000000 },
+   { 0x0c600031, 0x26200a88, 0x06000800, 0x02190006 },
+   { 0x00600001, 0x28400208, 0x008d0020, 0x00000000 },
+   { 0x00600001, 0x28600608, 0x00000000, 0x00000000 },
+   { 0x00600001, 0x28800608, 0x00000000, 0x00000000 },
+   { 0x00000001, 0x23800608, 0x00000000, 0x00000000 },
+   { 0x00000005, 0x23840208, 0x06000384, 0xff000000 },
+   { 0x00600001, 0x28a00208, 0x008d0380, 0x00000000 },
+   { 0x00000001, 0x24000688, 0x00000000, 0x10001000 },
+   { 0x00000001, 0x24010e88, 0x08000000, 0x00000000 },
+   { 0x00000001, 0x28a41248, 0x00000400, 0x00000000 },
+   { 0x00600001, 0x28c00608, 0x00000000, 0x00000000 },
+   { 0x00800001, 0x28c02288, 0x00cf03a3, 0x00000000 },
+   { 0x00000001, 0x28d00608, 0x00000000, 0x11111111 },
+   { 0x00000001, 0x28dc0608, 0x00000000, 0x00010101 },
+   { 0x00000001, 0x28d41248, 0x00000606, 0x00000000 },
+   { 0x00400001, 0x28f00208, 0x00690608, 0x00000000 },
+   { 0x00600001, 0x28e01248, 0x00ae0622, 0x00000000 },
+   { 0x00000001, 0x247c1648, 0x10000000, 0x00000000 },
+   { 0x00000001, 0x247c0e88, 0x08000000, 0x00000002 },
+   { 0x00000001, 0x247d2288, 0x000000a5, 0x00000000 },
+   { 0x00000001, 0x24001648, 0x10000000, 0x00200020 },
+   { 0x00000001, 0x247e2288, 0x00000400, 0x00000000 },
+   { 0x00600001, 0x28200208, 0x008d0460, 0x00000000 },
+   { 0x00000040, 0x244c0208, 0x0600044c, 0x00800000 },
+   { 0x00000001, 0x244f0e88, 0x08000000, 0x00000060 },
+   { 0x00600001, 0x28000208, 0x008d0440, 0x00000000 },
+   { 0x0d600031, 0x21800a08, 0x06000800, 0x10782000 },
+   { 0x00600001, 0x28000208, 0x008d0480, 0x00000000 },
+   { 0x00000001, 0x28200208, 0x00000180, 0x00000000 },
+   { 0x00000001, 0x28240208, 0x00000190, 0x00000000 },
+   { 0x00000001, 0x28280208, 0x00000194, 0x00000000 },
+   { 0x00000001, 0x282c0208, 0x00000198, 0x00000000 },
+   { 0x00000001, 0x28301248, 0x0000018c, 0x00000000 },
+   { 0x00000001, 0x28340208, 0x00000188, 0x00000000 },
+   { 0x00000001, 0x28380208, 0x0000019c, 0x00000000 },
+   { 0x00000001, 0x283c0208, 0x00000488, 0x00000000 },
+   { 0x0a800031, 0x20000a60, 0x06000800, 0x040a0203 },
+   { 0x00600001, 0x2e000208, 0x008d0000, 0x00000000 },
+   { 0x07800031, 0x24000a40, 0x06000e00, 0x82000010 },
-- 
1.9.1



More information about the Libva mailing list