xf86-video-ati: Branch 'r6xx-r7xx-support' - 2 commits

Alex Deucher agd5f at kemper.freedesktop.org
Wed Feb 11 12:32:04 PST 2009


 src/r600_exa.c                 |  554 +++++++++++++++++++++++++----------------
 src/r600_textured_videofuncs.c |   15 -
 2 files changed, 352 insertions(+), 217 deletions(-)

New commits:
commit bd141aa73a77f68301715fb3b5664e2082327a80
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Feb 11 15:31:47 2009 -0500

    R6xx/R7xx Xv: switch packed over to Yang's new shader code

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 34e67d8..542d42d 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -2841,7 +2841,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     // 2
     ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 				      TYPE(SQ_EXPORT_PIXEL),
-				      RW_GPR(3),
+				      RW_GPR(2),
 				      RW_REL(ABSOLUTE),
 				      INDEX_GPR(0),
 				      ELEM_SIZE(3));
@@ -2856,96 +2856,88 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
+    /* Undo scaling of Y'CbCr values
+     *  Y' is scaled from 16:235
+     *  Cb/Cr are scaled from 16:240
+     */
     // 3 - alu 0
-    // DP4 gpr[2].x gpr[1].x c[0].x
+    // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(259),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
 			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
+    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+			     SRC2_REL(ABSOLUTE),
+			     SRC2_ELEM(ELEM_Y),
+			     SRC2_NEG(0),
+			     ALU_INST(SQ_OP3_INST_MULADD),
+			     BANK_SWIZZLE(SQ_ALU_VEC_012),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
     // 4 - alu 1
-    // DP4 gpr[2].y gpr[1].y c[0].y
+    // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(259),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
+			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
 			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
+    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+			     SRC2_REL(ABSOLUTE),
+			     SRC2_ELEM(ELEM_W),
+			     SRC2_NEG(0),
+			     ALU_INST(SQ_OP3_INST_MULADD),
+			     BANK_SWIZZLE(SQ_ALU_VEC_012),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
+			     CLAMP(0));
     // 5 - alu 2
-    // DP4 gpr[2].z gpr[1].z c[0].z
+    // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(259),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
 			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
+    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+			     SRC2_REL(ABSOLUTE),
+			     SRC2_ELEM(ELEM_W),
+			     SRC2_NEG(0),
+			     ALU_INST(SQ_OP3_INST_MULADD),
+			     BANK_SWIZZLE(SQ_ALU_VEC_012),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
+			     CLAMP(0));
     // 6 - alu 3
-    // DP4 gpr[2].w gpr[1].w c[0].w
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+    // MOV gpr[1].w 0.0
+    ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
 			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
+			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(SQ_ALU_SRC_0),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
+			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -2955,22 +2947,22 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
+			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
+			     CLAMP(0));
     // 7 - alu 4
-    // DP4 gpr[2].x gpr[1].x c[1].x
+    // DP4 gpr[2].x gpr[1].x c[0].x
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
@@ -2982,7 +2974,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
+			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -2992,12 +2984,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
     // 8 - alu 5
-    // DP4 gpr[2].y gpr[1].y c[1].y
+    // DP4 gpr[2].y gpr[1].y c[0].y
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Y),
 			 SRC1_NEG(0),
@@ -3009,7 +3001,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3019,12 +3011,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
     // 9 - alu 6
-    // DP4 gpr[2].z gpr[1].z c[1].z
+    // DP4 gpr[2].z gpr[1].z c[0].z
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
@@ -3046,12 +3038,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
     // 10 - alu 7
-    // DP4 gpr[2].w gpr[1].w c[1].w
+    // DP4 gpr[2].w gpr[1].w c[0].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_W),
 			 SRC1_NEG(0),
@@ -3073,12 +3065,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
     // 11 - alu 8
-    // DP4 gpr[2].x gpr[1].x c[2].x
+    // DP4 gpr[2].x gpr[1].x c[1].x
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
@@ -3100,12 +3092,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
     // 12 - alu 9
-    // DP4 gpr[2].y gpr[1].y c[2].y
+    // DP4 gpr[2].y gpr[1].y c[1].y
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Y),
 			 SRC1_NEG(0),
@@ -3117,7 +3109,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
+			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3127,12 +3119,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
     // 13 - alu 10
-    // DP4 gpr[2].z gpr[1].z c[2].z
+    // DP4 gpr[2].z gpr[1].z c[1].z
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
@@ -3144,7 +3136,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3154,12 +3146,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
     // 14 - alu 11
-    // DP4 gpr[2].w gpr[1].w c[2].w
+    // DP4 gpr[2].w gpr[1].w c[1].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_W),
 			 SRC1_NEG(0),
@@ -3181,12 +3173,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
     // 15 - alu 12
-    // MOV gpr[3].x gpr[2].x
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].x gpr[1].x c[2].x
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
@@ -3198,24 +3190,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_210),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
-			     CLAMP(0));
+			     CLAMP(1));
     // 16 - alu 13
-    // MOV gpr[3].y gpr[2].y
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].y gpr[1].y c[2].y
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
+			 SRC1_ELEM(ELEM_Y),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3225,24 +3217,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_210),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
-			     CLAMP(0));
+			     CLAMP(1));
     // 17 - alu 14
-    // MOV gpr[3].z gpr[2].z
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].z gpr[1].z c[2].z
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
+			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3255,21 +3247,21 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_210),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
-			     CLAMP(0));
+			     CLAMP(1));
     // 18 - alu 15
-    // MOV gpr[3].w gpr[2].w
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].w gpr[1].w c[2].w
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
+			 SRC1_ELEM(ELEM_W),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3279,15 +3271,15 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_021),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
-			     CLAMP(0));
+			     CLAMP(1));
     // 19 - alignment
     ps[i++] = 0x00000000;
     ps[i++] = 0x00000000;
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 4a7391c..c06512a 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -202,7 +202,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 			accel_state->ps_size, accel_state->ps_mc_addr);
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
-    ps_conf.num_gprs            = 4;
+    ps_conf.num_gprs            = 3;
     ps_conf.stack_size          = 0;
     ps_conf.uncached_first_inst = 1;
     ps_conf.clamp_consts        = 0;
commit cb4a1ceaa32d6847b146a31291772e1183972ee7
Author: Yang Zhao <yang at yangman.ca>
Date:   Wed Feb 11 15:18:00 2009 -0500

    R6xx/R7xx Xv: Planar - Properly scale Y'CbCr values before converting to RGB
    
    According to MPEG-2 spec, Y' and Cb/Cr values are scaled to [16, 235]
    and [16, 240], respectively, when packed into bytes. Properly take care
    of the reverse scaling before translating to RGB.
    
    Conversion matrix has been simplified to remove 3rd column, as the fitting
    to [-0.5, 0.5] can be done with scaling.
    
    Redundant MOV instructions were also removed, and now only 3 GPRs are required.

diff --git a/src/r600_exa.c b/src/r600_exa.c
index a0f227b..34e67d8 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -3349,7 +3349,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     ps[i++] = TEX_DWORD_PAD;
 
     // xv ps planar ----------------------------------
-    i = accel_state->xv_ps_offset_planar / 4;
+     i = accel_state->xv_ps_offset_planar / 4;
     // 0
     ps[i++] = CF_DWORD0(ADDR(20));
     ps[i++] = CF_DWORD1(POP_COUNT(0),
@@ -3378,7 +3378,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     // 2
     ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 				      TYPE(SQ_EXPORT_PIXEL),
-				      RW_GPR(3),
+				      RW_GPR(2),
 				      RW_REL(ABSOLUTE),
 				      INDEX_GPR(0),
 				      ELEM_SIZE(3));
@@ -3393,96 +3393,88 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
+    /* Undo scaling of Y'CbCr values
+     *  Y' is scaled from 16:235
+     *  Cb/Cr are scaled from 16:240
+     */
     // 3 - alu 0
-    // DP4 gpr[2].x gpr[1].x c[0].x
+    // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(259),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
 			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
+    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+			     SRC2_REL(ABSOLUTE),
+			     SRC2_ELEM(ELEM_Y),
+			     SRC2_NEG(0),
+			     ALU_INST(SQ_OP3_INST_MULADD),
+			     BANK_SWIZZLE(SQ_ALU_VEC_012),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
     // 4 - alu 1
-    // DP4 gpr[2].y gpr[1].y c[0].y
+    // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(259),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
+			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
 			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
+    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+			     SRC2_REL(ABSOLUTE),
+			     SRC2_ELEM(ELEM_W),
+			     SRC2_NEG(0),
+			     ALU_INST(SQ_OP3_INST_MULADD),
+			     BANK_SWIZZLE(SQ_ALU_VEC_012),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
+			     CLAMP(0));
     // 5 - alu 2
-    // DP4 gpr[2].z gpr[1].z c[0].z
+    // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(259),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
 			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
+    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+			     SRC2_REL(ABSOLUTE),
+			     SRC2_ELEM(ELEM_W),
+			     SRC2_NEG(0),
+			     ALU_INST(SQ_OP3_INST_MULADD),
+			     BANK_SWIZZLE(SQ_ALU_VEC_012),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
+			     CLAMP(0));
     // 6 - alu 3
-    // DP4 gpr[2].w gpr[1].w c[0].w
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
+    // MOV gpr[1].w 0.0
+    ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
 			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
+			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(256),
+			 SRC1_SEL(SQ_ALU_SRC_0),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
+			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3492,22 +3484,22 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
+			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(1),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
+			     CLAMP(0));
     // 7 - alu 4
-    // DP4 gpr[2].x gpr[1].x c[1].x
+    // DP4 gpr[2].x gpr[1].x c[0].x
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
@@ -3519,7 +3511,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
+			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3529,12 +3521,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
     // 8 - alu 5
-    // DP4 gpr[2].y gpr[1].y c[1].y
+    // DP4 gpr[2].y gpr[1].y c[0].y
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Y),
 			 SRC1_NEG(0),
@@ -3546,7 +3538,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3556,12 +3548,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
     // 9 - alu 6
-    // DP4 gpr[2].z gpr[1].z c[1].z
+    // DP4 gpr[2].z gpr[1].z c[0].z
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
@@ -3583,12 +3575,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
     // 10 - alu 7
-    // DP4 gpr[2].w gpr[1].w c[1].w
+    // DP4 gpr[2].w gpr[1].w c[0].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
 			 SRC0_NEG(0),
-			 SRC1_SEL(257),
+			 SRC1_SEL(256),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_W),
 			 SRC1_NEG(0),
@@ -3610,12 +3602,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
     // 11 - alu 8
-    // DP4 gpr[2].x gpr[1].x c[2].x
+    // DP4 gpr[2].x gpr[1].x c[1].x
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
@@ -3637,12 +3629,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
     // 12 - alu 9
-    // DP4 gpr[2].y gpr[1].y c[2].y
+    // DP4 gpr[2].y gpr[1].y c[1].y
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Y),
 			 SRC1_NEG(0),
@@ -3654,7 +3646,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
+			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3664,12 +3656,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
     // 13 - alu 10
-    // DP4 gpr[2].z gpr[1].z c[2].z
+    // DP4 gpr[2].z gpr[1].z c[1].z
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
@@ -3681,7 +3673,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
 			     ALU_INST(SQ_OP2_INST_DOT4),
@@ -3691,12 +3683,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
     // 14 - alu 11
-    // DP4 gpr[2].w gpr[1].w c[2].w
+    // DP4 gpr[2].w gpr[1].w c[1].w
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
 			 SRC0_NEG(0),
-			 SRC1_SEL(258),
+			 SRC1_SEL(257),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_W),
 			 SRC1_NEG(0),
@@ -3718,12 +3710,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
     // 15 - alu 12
-    // MOV gpr[3].x gpr[2].x
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].x gpr[1].x c[2].x
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
 			 SRC1_ELEM(ELEM_X),
 			 SRC1_NEG(0),
@@ -3735,24 +3727,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_210),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
-			     CLAMP(0));
+			     CLAMP(1));
     // 16 - alu 13
-    // MOV gpr[3].y gpr[2].y
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].y gpr[1].y c[2].y
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
+			 SRC1_ELEM(ELEM_Y),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3762,24 +3754,24 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_210),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
-			     CLAMP(0));
+			     CLAMP(1));
     // 17 - alu 14
-    // MOV gpr[3].z gpr[2].z
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].z gpr[1].z c[2].z
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
+			 SRC1_ELEM(ELEM_Z),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3792,21 +3784,21 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     WRITE_MASK(1),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_210),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_102),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
-			     CLAMP(0));
+			     CLAMP(1));
     // 18 - alu 15
-    // MOV gpr[3].w gpr[2].w
-    ps[i++] = ALU_DWORD0(SRC0_SEL(2),
+    // DP4 gpr[2].w gpr[1].w c[2].w
+    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
 			 SRC0_NEG(0),
-			 SRC1_SEL(0),
+			 SRC1_SEL(258),
 			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
+			 SRC1_ELEM(ELEM_W),
 			 SRC1_NEG(0),
 			 INDEX_MODE(SQ_INDEX_LOOP),
 			 PRED_SEL(SQ_PRED_SEL_OFF),
@@ -3816,15 +3808,15 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     SRC1_ABS(0),
 			     UPDATE_EXECUTE_MASK(0),
 			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
+			     WRITE_MASK(0),
 			     FOG_MERGE(0),
 			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(3),
+			     ALU_INST(SQ_OP2_INST_DOT4),
+			     BANK_SWIZZLE(SQ_ALU_VEC_021),
+			     DST_GPR(2),
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
-			     CLAMP(0));
+			     CLAMP(1));
     // 19 - alignment
     ps[i++] = 0x00000000;
     ps[i++] = 0x00000000;
@@ -4057,6 +4049,150 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
 
+    // comp mask vs ---------------------------------------
+    i = accel_state->comp_mask_vs_offset / 4;
+    //0
+    vs[i++] = CF_DWORD0(ADDR(4));
+    vs[i++] = CF_DWORD1(POP_COUNT(0),
+			CF_CONST(0),
+			COND(SQ_CF_COND_ACTIVE),
+			I_COUNT(3),
+			CALL_COUNT(0),
+			END_OF_PROGRAM(0),
+			VALID_PIXEL_MODE(0),
+			CF_INST(SQ_CF_INST_VTX),
+			WHOLE_QUAD_MODE(0),
+			BARRIER(1));
+    //1 - dst
+    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+				      TYPE(SQ_EXPORT_POS),
+				      RW_GPR(2),
+				      RW_REL(ABSOLUTE),
+				      INDEX_GPR(0),
+				      ELEM_SIZE(0));
+    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					   SRC_SEL_Y(SQ_SEL_Y),
+					   SRC_SEL_Z(SQ_SEL_Z),
+					   SRC_SEL_W(SQ_SEL_W),
+					   R6xx_ELEM_LOOP(0),
+					   BURST_COUNT(1),
+					   END_OF_PROGRAM(0),
+					   VALID_PIXEL_MODE(0),
+					   CF_INST(SQ_CF_INST_EXPORT_DONE),
+					   WHOLE_QUAD_MODE(0),
+					   BARRIER(1));
+    //2 - src
+    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+				      TYPE(SQ_EXPORT_PARAM),
+				      RW_GPR(1),
+				      RW_REL(ABSOLUTE),
+				      INDEX_GPR(0),
+				      ELEM_SIZE(0));
+    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					   SRC_SEL_Y(SQ_SEL_Y),
+					   SRC_SEL_Z(SQ_SEL_Z),
+					   SRC_SEL_W(SQ_SEL_W),
+					   R6xx_ELEM_LOOP(0),
+					   BURST_COUNT(1),
+					   END_OF_PROGRAM(0),
+					   VALID_PIXEL_MODE(0),
+					   CF_INST(SQ_CF_INST_EXPORT),
+					   WHOLE_QUAD_MODE(0),
+					   BARRIER(0));
+    //3 - mask
+    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+				      TYPE(SQ_EXPORT_PARAM),
+				      RW_GPR(0),
+				      RW_REL(ABSOLUTE),
+				      INDEX_GPR(0),
+				      ELEM_SIZE(0));
+    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					   SRC_SEL_Y(SQ_SEL_Y),
+					   SRC_SEL_Z(SQ_SEL_Z),
+					   SRC_SEL_W(SQ_SEL_W),
+					   R6xx_ELEM_LOOP(0),
+					   BURST_COUNT(1),
+					   END_OF_PROGRAM(1),
+					   VALID_PIXEL_MODE(0),
+					   CF_INST(SQ_CF_INST_EXPORT_DONE),
+					   WHOLE_QUAD_MODE(0),
+					   BARRIER(0));
+    //4/5 - dst
+    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			 FETCH_WHOLE_QUAD(0),
+			 BUFFER_ID(0),
+			 SRC_GPR(0),
+			 SRC_REL(ABSOLUTE),
+			 SRC_SEL_X(SQ_SEL_X),
+			 MEGA_FETCH_COUNT(24));
+    vs[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+			     DST_REL(0),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_0),
+			     DST_SEL_W(SQ_SEL_1),
+			     USE_CONST_FIELDS(0),
+			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    vs[i++] = VTX_DWORD2(OFFSET(0),
+			 ENDIAN_SWAP(ENDIAN_NONE),
+			 CONST_BUF_NO_STRIDE(0),
+			 MEGA_FETCH(1));
+    vs[i++] = VTX_DWORD_PAD;
+    //6/7 - src
+    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			 FETCH_WHOLE_QUAD(0),
+			 BUFFER_ID(0),
+			 SRC_GPR(0),
+			 SRC_REL(ABSOLUTE),
+			 SRC_SEL_X(SQ_SEL_X),
+			 MEGA_FETCH_COUNT(8));
+    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+			     DST_REL(0),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_0),
+			     DST_SEL_W(SQ_SEL_1),
+			     USE_CONST_FIELDS(0),
+			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    vs[i++] = VTX_DWORD2(OFFSET(8),
+			 ENDIAN_SWAP(ENDIAN_NONE),
+			 CONST_BUF_NO_STRIDE(0),
+			 MEGA_FETCH(0));
+    vs[i++] = VTX_DWORD_PAD;
+    //8/9 - mask
+    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			 FETCH_WHOLE_QUAD(0),
+			 BUFFER_ID(0),
+			 SRC_GPR(0),
+			 SRC_REL(ABSOLUTE),
+			 SRC_SEL_X(SQ_SEL_X),
+			 MEGA_FETCH_COUNT(8));
+    vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+			     DST_REL(0),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_0),
+			     DST_SEL_W(SQ_SEL_1),
+			     USE_CONST_FIELDS(0),
+			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    vs[i++] = VTX_DWORD2(OFFSET(16),
+			 ENDIAN_SWAP(ENDIAN_NONE),
+			 CONST_BUF_NO_STRIDE(0),
+			 MEGA_FETCH(0));
+    vs[i++] = VTX_DWORD_PAD;
+
     // comp mask ps ---------------------------------------
     // not yet
 
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index d5add19..4a7391c 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -118,9 +118,16 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     int uv_offset;
 
     static float ps_alu_consts[] = {
-	1.0,  0.0,      1.13983,  -1.13983/2,        // r - c[0]
-	1.0, -0.39465, -0.5806,  (0.39465+0.5806)/2, // g - c[1]
-	1.0,  2.03211,  0.0,     -2.03211/2,         // b - c[2]
+        1.0,  0.0,      1.4020,    0,  // r - c[0]
+        1.0, -0.34414, -0.71414,  0,  // g - c[1]
+        1.0,  1.7720,   0.0,        0,  // b - c[2]
+	/* Constants for undoing Y'CbCr scaling
+	 *  - Y' is scaled from 16:235
+	 *  - Cb/Cr are scaled from 16:240
+	 * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5])
+	 * Vector is [Y_mul, Y_shfit, C_mul, C_shift]
+	 */
+        256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0,
     };
 
     CLEAR (cb_conf);


More information about the xorg-commit mailing list