xf86-video-ati: Branch 'master' - 3 commits
Alex Deucher
agd5f at kemper.freedesktop.org
Wed May 13 13:15:18 PDT 2009
src/r600_exa.c | 93 ++---
src/r600_shader.c | 620 ++++++++++++++++++++++++++++++++++++---
src/r600_textured_videofuncs.c | 22 +
src/radeon_commonfuncs.c | 306 +++++++++++++++----
src/radeon_exa_render.c | 99 +++++-
src/radeon_reg.h | 13
src/radeon_textured_videofuncs.c | 40 +-
7 files changed, 1002 insertions(+), 191 deletions(-)
New commits:
commit fa09b058c7a17689989e600ffd465856a058579d
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Wed May 13 16:10:40 2009 -0400
R6xx/R7xx Xv: normalize texture coordinates in the vertex shader
diff --git a/src/r600_shader.c b/src/r600_shader.c
index fba8dcb..ceabad8 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -457,7 +457,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(4));
+ shader[i++] = CF_DWORD0(ADDR(6));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -468,7 +468,22 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 1 */
+
+ /* 1 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(4),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(2),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 2 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(1),
@@ -486,7 +501,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 2 */
+ /* 3 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -504,9 +519,63 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- shader[i++] = 0x00000000;
- shader[i++] = 0x00000000;
- /* 4/5 */
+
+
+ /* 4 texX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 5 texY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 6/7 */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -531,7 +600,7 @@ int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 6/7 */
+ /* 8/9 */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 23e7f40..6af0949 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -160,6 +160,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
int ref = pPriv->transform_index;
Bool needgamma = FALSE;
float ps_alu_consts[12];
+ float vs_alu_consts[4];
cont = RTFContrast(pPriv->contrast);
bright = RTFBrightness(pPriv->brightness);
@@ -521,6 +522,15 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0);
+ vs_alu_consts[0] = 1.0 / pPriv->w;
+ vs_alu_consts[1] = 1.0 / pPriv->h;
+ vs_alu_consts[2] = 0.0;
+ vs_alu_consts[3] = 0.0;
+
+ /* VS alu constants */
+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs,
+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
if (pPriv->vsync) {
xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn,
pPriv->drw_x,
@@ -571,18 +581,18 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
vb[0] = (float)dstX;
vb[1] = (float)dstY;
- vb[2] = (float)srcX / pPriv->w;
- vb[3] = (float)srcY / pPriv->h;
+ vb[2] = (float)srcX;
+ vb[3] = (float)srcY;
vb[4] = (float)dstX;
vb[5] = (float)(dstY + dsth);
- vb[6] = (float)srcX / pPriv->w;
- vb[7] = (float)(srcY + srch) / pPriv->h;
+ vb[6] = (float)srcX;
+ vb[7] = (float)(srcY + srch);
vb[8] = (float)(dstX + dstw);
vb[9] = (float)(dstY + dsth);
- vb[10] = (float)(srcX + srcw) / pPriv->w;
- vb[11] = (float)(srcY + srch) / pPriv->h;
+ vb[10] = (float)(srcX + srcw);
+ vb[11] = (float)(srcY + srch);
accel_state->vb_index += 3;
commit 026b6f820d6caea17d2a082193e850713d5770a8
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Wed May 13 15:48:32 2009 -0400
R6xx/R7xx: do EXA transforms in the vertex shader
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 2dc33a8..18831f7 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -924,17 +924,6 @@ do { \
#define xFixedToFloat(f) (((float) (f)) / 65536)
-static inline void transformPoint(PictTransform *transform, xPointFixed *point)
-{
- PictVector v;
- v.vector[0] = point->x;
- v.vector[1] = point->y;
- v.vector[2] = xFixed1;
- PictureTransformPoint(transform, &v);
- point->x = v.vector[0];
- point->y = v.vector[1];
-}
-
struct blendinfo {
Bool dst_alpha;
Bool src_alpha;
@@ -1099,6 +1088,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
tex_resource_t tex_res;
tex_sampler_t tex_samp;
int pix_r, pix_g, pix_b, pix_a;
+ float vs_alu_consts[8];
CLEAR (tex_res);
CLEAR (tex_samp);
@@ -1118,9 +1108,6 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
break;
}
- accel_state->texW[unit] = w;
- accel_state->texH[unit] = h;
-
/* ErrorF("Tex %d setup %dx%d\n", unit, w, h); */
/* flush texture cache */
@@ -1294,9 +1281,34 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
if (pPict->transform != 0) {
accel_state->is_transform[unit] = TRUE;
accel_state->transform[unit] = pPict->transform;
- } else
+
+ vs_alu_consts[0] = xFixedToFloat(pPict->transform->matrix[0][0]);
+ vs_alu_consts[1] = xFixedToFloat(pPict->transform->matrix[0][1]);
+ vs_alu_consts[2] = xFixedToFloat(pPict->transform->matrix[0][2]);
+ vs_alu_consts[3] = 1.0 / w;
+
+ vs_alu_consts[4] = xFixedToFloat(pPict->transform->matrix[1][0]);
+ vs_alu_consts[5] = xFixedToFloat(pPict->transform->matrix[1][1]);
+ vs_alu_consts[6] = xFixedToFloat(pPict->transform->matrix[1][2]);
+ vs_alu_consts[7] = 1.0 / h;
+ } else {
accel_state->is_transform[unit] = FALSE;
+ vs_alu_consts[0] = 1.0;
+ vs_alu_consts[1] = 0.0;
+ vs_alu_consts[2] = 0.0;
+ vs_alu_consts[3] = 1.0 / w;
+
+ vs_alu_consts[4] = 0.0;
+ vs_alu_consts[5] = 1.0;
+ vs_alu_consts[6] = 0.0;
+ vs_alu_consts[7] = 1.0 / h;
+ }
+
+ /* VS alu constants */
+ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
+ sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+
return TRUE;
}
@@ -1586,14 +1598,6 @@ static void R600Composite(PixmapPtr pDst,
srcBottomRight.x = IntToxFixed(srcX + w);
srcBottomRight.y = IntToxFixed(srcY + h);
- /* XXX do transform in vertex shader */
- if (accel_state->is_transform[0]) {
- transformPoint(accel_state->transform[0], &srcTopLeft);
- transformPoint(accel_state->transform[0], &srcTopRight);
- transformPoint(accel_state->transform[0], &srcBottomLeft);
- transformPoint(accel_state->transform[0], &srcBottomRight);
- }
-
if (accel_state->has_mask) {
xPointFixed maskTopLeft, maskTopRight, maskBottomLeft, maskBottomRight;
@@ -1616,33 +1620,26 @@ static void R600Composite(PixmapPtr pDst,
maskBottomRight.x = IntToxFixed(maskX + w);
maskBottomRight.y = IntToxFixed(maskY + h);
- if (accel_state->is_transform[1]) {
- transformPoint(accel_state->transform[1], &maskTopLeft);
- transformPoint(accel_state->transform[1], &maskTopRight);
- transformPoint(accel_state->transform[1], &maskBottomLeft);
- transformPoint(accel_state->transform[1], &maskBottomRight);
- }
-
vb[0] = (float)dstX;
vb[1] = (float)dstY;
- vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
- vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
- vb[4] = xFixedToFloat(maskTopLeft.x) / accel_state->texW[1];
- vb[5] = xFixedToFloat(maskTopLeft.y) / accel_state->texH[1];
+ vb[2] = xFixedToFloat(srcTopLeft.x);
+ vb[3] = xFixedToFloat(srcTopLeft.y);
+ vb[4] = xFixedToFloat(maskTopLeft.x);
+ vb[5] = xFixedToFloat(maskTopLeft.y);
vb[6] = (float)dstX;
vb[7] = (float)(dstY + h);
- vb[8] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
- vb[9] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
- vb[10] = xFixedToFloat(maskBottomLeft.x) / accel_state->texW[1];
- vb[11] = xFixedToFloat(maskBottomLeft.y) / accel_state->texH[1];
+ vb[8] = xFixedToFloat(srcBottomLeft.x);
+ vb[9] = xFixedToFloat(srcBottomLeft.y);
+ vb[10] = xFixedToFloat(maskBottomLeft.x);
+ vb[11] = xFixedToFloat(maskBottomLeft.y);
vb[12] = (float)(dstX + w);
vb[13] = (float)(dstY + h);
- vb[14] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
- vb[15] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
- vb[16] = xFixedToFloat(maskBottomRight.x) / accel_state->texW[1];
- vb[17] = xFixedToFloat(maskBottomRight.y) / accel_state->texH[1];
+ vb[14] = xFixedToFloat(srcBottomRight.x);
+ vb[15] = xFixedToFloat(srcBottomRight.y);
+ vb[16] = xFixedToFloat(maskBottomRight.x);
+ vb[17] = xFixedToFloat(maskBottomRight.y);
} else {
if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) {
@@ -1657,18 +1654,18 @@ static void R600Composite(PixmapPtr pDst,
vb[0] = (float)dstX;
vb[1] = (float)dstY;
- vb[2] = xFixedToFloat(srcTopLeft.x) / accel_state->texW[0];
- vb[3] = xFixedToFloat(srcTopLeft.y) / accel_state->texH[0];
+ vb[2] = xFixedToFloat(srcTopLeft.x);
+ vb[3] = xFixedToFloat(srcTopLeft.y);
vb[4] = (float)dstX;
vb[5] = (float)(dstY + h);
- vb[6] = xFixedToFloat(srcBottomLeft.x) / accel_state->texW[0];
- vb[7] = xFixedToFloat(srcBottomLeft.y) / accel_state->texH[0];
+ vb[6] = xFixedToFloat(srcBottomLeft.x);
+ vb[7] = xFixedToFloat(srcBottomLeft.y);
vb[8] = (float)(dstX + w);
vb[9] = (float)(dstY + h);
- vb[10] = xFixedToFloat(srcBottomRight.x) / accel_state->texW[0];
- vb[11] = xFixedToFloat(srcBottomRight.y) / accel_state->texH[0];
+ vb[10] = xFixedToFloat(srcBottomRight.x);
+ vb[11] = xFixedToFloat(srcBottomRight.y);
}
accel_state->vb_index += 3;
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 0a820cf..fba8dcb 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1322,7 +1322,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(14));
+ shader[i++] = CF_DWORD0(ADDR(28));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -1346,7 +1346,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(8));
+ shader[i++] = CF_DWORD0(ADDR(22));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1357,7 +1357,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 4 - dst */
+
+ /* 4 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(9),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(12),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 - dst */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(2),
@@ -1366,8 +1381,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1375,7 +1390,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 5 - src */
+ /* 6 - src */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(1),
@@ -1384,8 +1399,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1393,7 +1408,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 6 - mask */
+ /* 7 - mask */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -1402,8 +1417,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
END_OF_PROGRAM(0),
@@ -1411,7 +1426,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 7 */
+ /* 8 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1423,7 +1438,301 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 8/9 - dst */
+
+
+ /* 9 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 10 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 11 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 12 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(1),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(1),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 13 maskX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(258),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+
+ /* 14 maskY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(259),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 15 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(258),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 16 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(259),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+
+ /* 17 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 18 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 19 maskX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(258),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 20 maskY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(259),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 21 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 22/23 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1448,7 +1757,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 10/11 - src */
+ /* 24/25 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1461,8 +1770,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
@@ -1473,7 +1782,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 12/13 - mask */
+ /* 26/27 - mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1486,8 +1795,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
@@ -1499,8 +1808,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
MEGA_FETCH(0));
shader[i++] = VTX_DWORD_PAD;
- /* 14 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(18));
+ /* 28 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(40));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1511,7 +1820,22 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_VTX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 15 - dst */
+
+ /* 29 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(33),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(6),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 30 - dst */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
TYPE(SQ_EXPORT_POS),
RW_GPR(1),
@@ -1520,8 +1844,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(0),
END_OF_PROGRAM(0),
@@ -1529,7 +1853,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 16 - src */
+ /* 31 - src */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
TYPE(SQ_EXPORT_PARAM),
RW_GPR(0),
@@ -1538,8 +1862,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
ELEM_SIZE(0));
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
R6xx_ELEM_LOOP(0),
BURST_COUNT(0),
END_OF_PROGRAM(0),
@@ -1547,7 +1871,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(0));
- /* 17 */
+ /* 32 */
shader[i++] = CF_DWORD0(ADDR(0));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
@@ -1559,7 +1883,156 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 18/19 - dst */
+
+
+ /* 33 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(256),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(0));
+ /* 34 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(257),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(0));
+
+ /* 35 srcX MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_Z),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(0));
+ /* 36 srcY MAD */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(257),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(0),
+ SRC2_REL(ABSOLUTE),
+ SRC2_ELEM(ELEM_W),
+ SRC2_NEG(0),
+ ALU_INST(SQ_OP3_INST_MULADD),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(0));
+ /* 37 srcX / w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(256),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+
+ /* 38 srcY / h */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(0),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(257),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_AR_X),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+
+ /* 39 */
+ shader[i++] = 0x00000000;
+ shader[i++] = 0x00000000;
+
+ /* 40/41 - dst */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1584,7 +2057,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 20/21 - src */
+ /* 42/43 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -1597,8 +2070,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(0),
DST_SEL_X(SQ_SEL_X),
DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
USE_CONST_FIELDS(0),
DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
commit cd89241396d1931b04cfbdd8d553be16dbf9c360
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Tue May 12 17:30:02 2009 -0400
R3xx-R5xx: do EXA transforms in the vertex shader
diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index ba358ab..28bb6e5 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -220,10 +220,10 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
/* pre-load the vertex shaders */
if (info->accel_state->has_tcl) {
- /* exa mask/Xv bicubic shader program */
- BEGIN_ACCEL(13);
- OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
- /* PVS inst 0 */
+ BEGIN_ACCEL(37);
+ /* exa composite shader program */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_INST_INDEX(0));
+ /* PVS inst 0 - dst X,Y */
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_DST_OPCODE(R300_VE_ADD) |
R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
@@ -235,8 +235,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_OFFSET(0) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(0) |
@@ -252,20 +252,26 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
- /* PVS inst 1 */
+ /* PVS inst 1 - src X */
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
- (R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(1) |
- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) |
+ R300_PVS_DST_OFFSET(0) |
+ R300_PVS_DST_WE_X));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
@@ -273,6 +279,27 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+ /* PVS inst 2 - src Y */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) |
+ R300_PVS_DST_OFFSET(0) |
+ R300_PVS_DST_WE_Y));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(6) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(1) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
@@ -281,82 +308,138 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
- /* PVS inst 2 */
+ /* PVS inst 3 - src X / w */
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
- (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) |
R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(2) |
- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ R300_PVS_DST_OFFSET(1) |
+ R300_PVS_DST_WE_X));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
- (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(7) |
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) |
+ R300_PVS_SRC_OFFSET(0) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
- R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_W) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_OFFSET(6) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+ /* PVS inst 4 - src y / h */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(1) |
+ R300_PVS_DST_WE_Y));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(1) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_W) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_OFFSET(6) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
- FINISH_ACCEL();
- BEGIN_ACCEL(9);
- /* exa no mask instruction */
- OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 3);
- /* PVS inst 0 */
+ /* PVS inst 5 - mask X */
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
- (R300_PVS_DST_OPCODE(R300_VE_ADD) |
- R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) |
R300_PVS_DST_OFFSET(0) |
- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ R300_PVS_DST_WE_Z));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(2) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_OFFSET(7) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+ /* PVS inst 6 - mask Y */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_DOT_PRODUCT) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_TEMPORARY) |
+ R300_PVS_DST_OFFSET(0) |
+ R300_PVS_DST_WE_W));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_1) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(3) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(7) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
- /* PVS inst 1 */
+ /* PVS inst 7 - mask X / w */
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
- (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) |
R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
- R300_PVS_DST_OFFSET(1) |
- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ R300_PVS_DST_OFFSET(2) |
+ R300_PVS_DST_WE_X));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
- (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
- R300_PVS_SRC_OFFSET(6) |
- R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
- R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_Z) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(2) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_W) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
@@ -364,6 +447,27 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+ /* PVS inst 8 - mask y / h */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_MULTIPLY) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(2) |
+ R300_PVS_DST_WE_Y));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_TEMPORARY) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_W) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_CONSTANT) |
+ R300_PVS_SRC_OFFSET(3) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_W) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
@@ -375,7 +479,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
/* Xv shader program */
BEGIN_ACCEL(9);
- OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 5);
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_INST_INDEX(9));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_DST_OPCODE(R300_VE_ADD) |
@@ -388,8 +492,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_OFFSET(0) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(0) |
@@ -409,15 +513,14 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
(R300_PVS_DST_OPCODE(R300_VE_ADD) |
R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
R300_PVS_DST_OFFSET(1) |
- R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
- R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
- R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
- R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1)));
OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
(R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
R300_PVS_SRC_OFFSET(6) |
@@ -433,6 +536,97 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
FINISH_ACCEL();
+
+ /* Xv bicubic shader program */
+ BEGIN_ACCEL(13);
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_INST_INDEX(11));
+ /* PVS inst 0 */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(0) |
+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(0) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+ /* PVS inst 1 */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(1) |
+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(6) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(6) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(6) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+ /* PVS inst 2 */
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+ R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+ R300_PVS_DST_OFFSET(2) |
+ R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+ R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_1)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+ (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+ R300_PVS_SRC_OFFSET(7) |
+ R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+ R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+ FINISH_ACCEL();
}
/* pre-load the RS instructions */
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 8dbbee9..89c803a 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1099,9 +1099,6 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
*/
txformat0 |= R300_TXPITCH_EN;
- info->accel_state->texW[unit] = w;
- info->accel_state->texH[unit] = h;
-
txfilter = (unit << R300_TX_ID_SHIFT);
if (pPict->repeat) {
@@ -1160,8 +1157,61 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
if (pPict->transform != 0) {
info->accel_state->is_transform[unit] = TRUE;
info->accel_state->transform[unit] = pPict->transform;
+
+ /* setup the PVS consts */
+ if (info->accel_state->has_tcl) {
+ info->accel_state->texW[unit] = 1;
+ info->accel_state->texH[unit] = 1;
+ BEGIN_ACCEL(9);
+ if (IS_R300_3D)
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
+ else
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
+
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][0])));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][1])));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[0][2])));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
+
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][0])));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][1])));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(xFixedToFloat(pPict->transform->matrix[1][2])));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
+
+ FINISH_ACCEL();
+ } else {
+ info->accel_state->texW[unit] = w;
+ info->accel_state->texH[unit] = h;
+ }
} else {
info->accel_state->is_transform[unit] = FALSE;
+
+ /* setup the PVS consts */
+ if (info->accel_state->has_tcl) {
+ info->accel_state->texW[unit] = 1;
+ info->accel_state->texH[unit] = 1;
+
+ BEGIN_ACCEL(9);
+ if (IS_R300_3D)
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R300_PVS_VECTOR_CONST_INDEX(unit * 2));
+ else
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, R500_PVS_VECTOR_CONST_INDEX(unit * 2));
+
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/w));
+
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(0.0));
+ OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG, F_TO_DW(1.0/h));
+
+ FINISH_ACCEL();
+ } else {
+ info->accel_state->texW[unit] = w;
+ info->accel_state->texH[unit] = h;
+ }
}
return TRUE;
@@ -1310,9 +1360,10 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
/* setup the VAP */
if (info->accel_state->has_tcl) {
if (pMask)
- BEGIN_ACCEL(8);
+ BEGIN_ACCEL(10);
else
- BEGIN_ACCEL(7);
+ BEGIN_ACCEL(9);
+ OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
} else {
if (pMask)
BEGIN_ACCEL(6);
@@ -1363,22 +1414,28 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
/* load the vertex shader
* We pre-load vertex programs in RADEONInit3DEngine():
- * - exa no mask
- * - exa mask
+ * - exa
* - Xv
+ * - Xv bicubic
* Here we select the offset of the vertex program we want to use
*/
if (info->accel_state->has_tcl) {
if (pMask) {
+ /* consts used by vertex shaders */
+ OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
+ R300_PVS_MAX_CONST_ADDR(3)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
((0 << R300_PVS_FIRST_INST_SHIFT) |
- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (2 << R300_PVS_LAST_INST_SHIFT)));
+ (8 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (8 << R300_PVS_LAST_INST_SHIFT)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ (8 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
} else {
+ /* consts used by vertex shaders */
+ OUT_ACCEL_REG(R300_VAP_PVS_CONST_CNTL, (R300_PVS_CONST_BASE_OFFSET(0) |
+ R300_PVS_MAX_CONST_ADDR(3)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((3 << R300_PVS_FIRST_INST_SHIFT) |
+ ((0 << R300_PVS_FIRST_INST_SHIFT) |
(4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
(4 << R300_PVS_LAST_INST_SHIFT)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
@@ -2054,10 +2111,12 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
srcBottomRight.y = IntToxFixed(srcY + h);
if (info->accel_state->is_transform[0]) {
- transformPoint(info->accel_state->transform[0], &srcTopLeft);
- transformPoint(info->accel_state->transform[0], &srcTopRight);
- transformPoint(info->accel_state->transform[0], &srcBottomLeft);
- transformPoint(info->accel_state->transform[0], &srcBottomRight);
+ if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
+ transformPoint(info->accel_state->transform[0], &srcTopLeft);
+ transformPoint(info->accel_state->transform[0], &srcTopRight);
+ transformPoint(info->accel_state->transform[0], &srcBottomLeft);
+ transformPoint(info->accel_state->transform[0], &srcBottomRight);
+ }
}
if (info->accel_state->has_mask) {
@@ -2071,10 +2130,12 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
maskBottomRight.y = IntToxFixed(maskY + h);
if (info->accel_state->is_transform[1]) {
- transformPoint(info->accel_state->transform[1], &maskTopLeft);
- transformPoint(info->accel_state->transform[1], &maskTopRight);
- transformPoint(info->accel_state->transform[1], &maskBottomLeft);
- transformPoint(info->accel_state->transform[1], &maskBottomRight);
+ if ((info->ChipFamily < CHIP_FAMILY_R300) || !info->accel_state->has_tcl) {
+ transformPoint(info->accel_state->transform[1], &maskTopLeft);
+ transformPoint(info->accel_state->transform[1], &maskTopRight);
+ transformPoint(info->accel_state->transform[1], &maskBottomLeft);
+ transformPoint(info->accel_state->transform[1], &maskBottomRight);
+ }
}
vtx_count = 6;
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 248cb42..9261b39 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4284,6 +4284,12 @@
#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200
+# define R300_PVS_CODE_START 0
+# define R300_PVS_CONST_START 512
+# define R500_PVS_CONST_START 1024
+# define R300_PVS_VECTOR_INST_INDEX(x) ((x) + R300_PVS_CODE_START)
+# define R300_PVS_VECTOR_CONST_INDEX(x) ((x) + R300_PVS_CONST_START)
+# define R500_PVS_VECTOR_CONST_INDEX(x) ((x) + R500_PVS_CONST_START)
#define R300_VAP_PVS_VECTOR_DATA_REG 0x2204
/* PVS instructions */
/* Opcode and dst instruction */
@@ -4402,6 +4408,10 @@
#define R300_PVS_SRC_ADDR_SEL(x) ((x) << 29)
#define R300_PVS_SRC_ADDR_MODE_1 (1 << 31)
+#define R300_VAP_PVS_CONST_CNTL 0x22d4
+# define R300_PVS_CONST_BASE_OFFSET(x) ((x) << 0)
+# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16)
+
#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22dc
#define R300_VAP_OUT_VTX_FMT_0 0x2090
# define R300_VTX_POS_PRESENT (1 << 0)
@@ -5441,9 +5451,6 @@
# define R500_W_SRC_US (0 << 2)
# define R500_W_SRC_RAS (1 << 2)
-#define R500_GA_US_VECTOR_INDEX 0x4250
-#define R500_GA_US_VECTOR_DATA 0x4254
-
#define R500_RS_INST_0 0x4320
#define R500_RS_INST_1 0x4324
# define R500_RS_INST_TEX_ID_SHIFT 0
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index bbc5caf..8ead2a4 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -1213,26 +1213,26 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
/* load the vertex shader
* We pre-load vertex programs in RADEONInit3DEngine():
- * - exa mask/Xv bicubic
- * - exa no mask
+ * - exa
* - Xv
+ * - Xv bicubic
* Here we select the offset of the vertex program we want to use
*/
if (info->accel_state->has_tcl) {
if (pPriv->bicubic_enabled) {
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((0 << R300_PVS_FIRST_INST_SHIFT) |
- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (2 << R300_PVS_LAST_INST_SHIFT)));
+ ((11 << R300_PVS_FIRST_INST_SHIFT) |
+ (13 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (13 << R300_PVS_LAST_INST_SHIFT)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
} else {
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((5 << R300_PVS_FIRST_INST_SHIFT) |
- (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (6 << R300_PVS_LAST_INST_SHIFT)));
+ ((9 << R300_PVS_FIRST_INST_SHIFT) |
+ (10 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (10 << R300_PVS_LAST_INST_SHIFT)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
}
}
@@ -2643,26 +2643,26 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
/* load the vertex shader
* We pre-load vertex programs in RADEONInit3DEngine():
- * - exa mask/Xv bicubic
- * - exa no mask
+ * - exa
* - Xv
+ * - Xv bicubic
* Here we select the offset of the vertex program we want to use
*/
if (info->accel_state->has_tcl) {
if (pPriv->bicubic_enabled) {
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((0 << R300_PVS_FIRST_INST_SHIFT) |
- (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (2 << R300_PVS_LAST_INST_SHIFT)));
+ ((11 << R300_PVS_FIRST_INST_SHIFT) |
+ (13 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (13 << R300_PVS_LAST_INST_SHIFT)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ (13 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
} else {
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
- ((5 << R300_PVS_FIRST_INST_SHIFT) |
- (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
- (6 << R300_PVS_LAST_INST_SHIFT)));
+ ((9 << R300_PVS_FIRST_INST_SHIFT) |
+ (10 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (10 << R300_PVS_LAST_INST_SHIFT)));
OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
- (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+ (10 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
}
}
More information about the xorg-commit
mailing list