xf86-video-ati: Branch 'master' - 2 commits
Alex Deucher
agd5f at kemper.freedesktop.org
Fri Jul 30 14:25:22 PDT 2010
src/r600_exa.c | 19 -
src/r600_shader.c | 860 ++++++++++++++++++++++++++++--------------------------
src/r600_shader.h | 1
src/radeon.h | 1
4 files changed, 458 insertions(+), 423 deletions(-)
New commits:
commit 82254b59268140c4102ae3cd713743ae2be15c00
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Fri Jul 30 17:15:05 2010 -0400
r6xx/r7xx: unify composite mask and non-mask pixel shader
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 89d5877..72c4ff8 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1427,7 +1427,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
&src_obj,
&mask_obj,
&dst_obj,
- accel_state->comp_vs_offset, accel_state->comp_mask_ps_offset,
+ accel_state->comp_vs_offset, accel_state->comp_ps_offset,
3, 0xffffffff))
return FALSE;
@@ -1491,10 +1491,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
} else
accel_state->is_transform[1] = FALSE;
- if (pMask)
+ if (pMask) {
set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
- else
+ set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
+ } else {
set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
+ set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
+ }
/* Shader */
@@ -1516,7 +1519,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
ps_conf.shader_addr = accel_state->ps_mc_addr;
ps_conf.num_gprs = 3;
- ps_conf.stack_size = 0;
+ ps_conf.stack_size = 1;
ps_conf.uncached_first_inst = 1;
ps_conf.clamp_consts = 0;
ps_conf.export_mode = 2;
@@ -2224,16 +2227,12 @@ R600LoadShaders(ScrnInfoPtr pScrn)
accel_state->comp_ps_offset = 2560;
R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
- /* comp mask ps --------------------------------------- */
- accel_state->comp_mask_ps_offset = 3072;
- R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
-
/* xv vs --------------------------------------- */
- accel_state->xv_vs_offset = 3584;
+ accel_state->xv_vs_offset = 3072;
R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
/* xv ps --------------------------------------- */
- accel_state->xv_ps_offset = 4096;
+ accel_state->xv_ps_offset = 3584;
R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
#ifdef XF86DRM_MODE
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 47bc007..e2a4163 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1149,230 +1149,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
return i;
}
-/* comp mask ps --------------------------------------- */
-int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
-{
- int i = 0;
-
- /* 0 */
- shader[i++] = CF_DWORD0(ADDR(8));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_TEX),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 1 */
- shader[i++] = CF_ALU_DWORD0(ADDR(3),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(4),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 2 */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
- TYPE(SQ_EXPORT_PIXEL),
- RW_GPR(2),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(1));
-
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_Z),
- SRC_SEL_W(SQ_SEL_W),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(1),
- END_OF_PROGRAM(1),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 3 - alu 0 */
- /* MUL gpr[2].x gpr[1].x gpr[0].x */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_X),
- SRC0_NEG(0),
- SRC1_SEL(0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_X),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_X),
- CLAMP(1));
- /* 4 - alu 1 */
- /* MUL gpr[2].y gpr[1].y gpr[0].y */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Y),
- SRC0_NEG(0),
- SRC1_SEL(0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Y),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Y),
- CLAMP(1));
- /* 5 - alu 2 */
- /* MUL gpr[2].z gpr[1].z gpr[0].z */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_Z),
- SRC0_NEG(0),
- SRC1_SEL(0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_Z),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(0));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_Z),
- CLAMP(1));
- /* 6 - alu 3 */
- /* MUL gpr[2].w gpr[1].w gpr[0].w */
- shader[i++] = ALU_DWORD0(SRC0_SEL(1),
- SRC0_REL(ABSOLUTE),
- SRC0_ELEM(ELEM_W),
- SRC0_NEG(0),
- SRC1_SEL(0),
- SRC1_REL(ABSOLUTE),
- SRC1_ELEM(ELEM_W),
- SRC1_NEG(0),
- INDEX_MODE(SQ_INDEX_LOOP),
- PRED_SEL(SQ_PRED_SEL_OFF),
- LAST(1));
- shader[i++] = ALU_DWORD1_OP2(ChipSet,
- SRC0_ABS(0),
- SRC1_ABS(0),
- UPDATE_EXECUTE_MASK(0),
- UPDATE_PRED(0),
- WRITE_MASK(1),
- FOG_MERGE(0),
- OMOD(SQ_ALU_OMOD_OFF),
- ALU_INST(SQ_OP2_INST_MUL),
- BANK_SWIZZLE(SQ_ALU_VEC_012),
- DST_GPR(2),
- DST_REL(ABSOLUTE),
- DST_ELEM(ELEM_W),
- CLAMP(1));
- /* 7 */
- shader[i++] = 0x00000000;
- shader[i++] = 0x00000000;
-
- /* 8/9 - src */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(0),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(0),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
- /* 10/11 - mask */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(1),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_Z),
- DST_SEL_W(SQ_SEL_W),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
-
- return i;
-}
-
/* comp vs --------------------------------------- */
int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
{
@@ -2152,7 +1928,102 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
int i = 0;
/* 0 */
- shader[i++] = CF_DWORD0(ADDR(2));
+ shader[i++] = CF_DWORD0(ADDR(3));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_BOOL),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 1 */
+ shader[i++] = CF_DWORD0(ADDR(7));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_NOT_BOOL),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_CALL),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 2 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(1),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_NOP),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 3 - mask sub */
+ shader[i++] = CF_DWORD0(ADDR(14));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_TEX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 4 */
+ shader[i++] = CF_ALU_DWORD0(ADDR(10),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(4),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 5 */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+ TYPE(SQ_EXPORT_PIXEL),
+ RW_GPR(2),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(1));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_Z),
+ SRC_SEL_W(SQ_SEL_W),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(1),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 6 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 7 non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(18));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -2163,28 +2034,204 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_TEX),
WHOLE_QUAD_MODE(0),
BARRIER(1));
- /* 1 */
+ /* 8 */
shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
TYPE(SQ_EXPORT_PIXEL),
RW_GPR(0),
RW_REL(ABSOLUTE),
INDEX_GPR(0),
ELEM_SIZE(1));
-
shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
SRC_SEL_Y(SQ_SEL_Y),
SRC_SEL_Z(SQ_SEL_Z),
SRC_SEL_W(SQ_SEL_W),
R6xx_ELEM_LOOP(0),
BURST_COUNT(1),
- END_OF_PROGRAM(1),
+ END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_EXPORT_DONE),
WHOLE_QUAD_MODE(0),
BARRIER(1));
+ /* 9 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 10 - alu 0 */
+ /* MUL gpr[2].x gpr[1].x gpr[0].x */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_X),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_X),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_X),
+ CLAMP(1));
+ /* 11 - alu 1 */
+ /* MUL gpr[2].y gpr[1].y gpr[0].y */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Y),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Y),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Y),
+ CLAMP(1));
+ /* 12 - alu 2 */
+ /* MUL gpr[2].z gpr[1].z gpr[0].z */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_Z),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_Z),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(0));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_Z),
+ CLAMP(1));
+ /* 13 - alu 3 */
+ /* MUL gpr[2].w gpr[1].w gpr[0].w */
+ shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+ SRC0_REL(ABSOLUTE),
+ SRC0_ELEM(ELEM_W),
+ SRC0_NEG(0),
+ SRC1_SEL(0),
+ SRC1_REL(ABSOLUTE),
+ SRC1_ELEM(ELEM_W),
+ SRC1_NEG(0),
+ INDEX_MODE(SQ_INDEX_LOOP),
+ PRED_SEL(SQ_PRED_SEL_OFF),
+ LAST(1));
+ shader[i++] = ALU_DWORD1_OP2(ChipSet,
+ SRC0_ABS(0),
+ SRC1_ABS(0),
+ UPDATE_EXECUTE_MASK(0),
+ UPDATE_PRED(0),
+ WRITE_MASK(1),
+ FOG_MERGE(0),
+ OMOD(SQ_ALU_OMOD_OFF),
+ ALU_INST(SQ_OP2_INST_MUL),
+ BANK_SWIZZLE(SQ_ALU_VEC_012),
+ DST_GPR(2),
+ DST_REL(ABSOLUTE),
+ DST_ELEM(ELEM_W),
+ CLAMP(1));
+ /* 14/15 - src - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ shader[i++] = TEX_DWORD1(DST_GPR(0),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(0),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
+ /* 16/17 - mask */
+ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+ BC_FRAC_MODE(0),
+ FETCH_WHOLE_QUAD(0),
+ RESOURCE_ID(1),
+ SRC_GPR(1),
+ SRC_REL(ABSOLUTE),
+ R7xx_ALT_CONST(0));
+ shader[i++] = TEX_DWORD1(DST_GPR(1),
+ DST_REL(ABSOLUTE),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
+ DST_SEL_W(SQ_SEL_W),
+ LOD_BIAS(0),
+ COORD_TYPE_X(TEX_NORMALIZED),
+ COORD_TYPE_Y(TEX_NORMALIZED),
+ COORD_TYPE_Z(TEX_NORMALIZED),
+ COORD_TYPE_W(TEX_NORMALIZED));
+ shader[i++] = TEX_DWORD2(OFFSET_X(0),
+ OFFSET_Y(0),
+ OFFSET_Z(0),
+ SAMPLER_ID(1),
+ SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1));
+ shader[i++] = TEX_DWORD_PAD;
- /* 2/3 - src */
+ /* 18/19 - src - non-mask */
shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
BC_FRAC_MODE(0),
FETCH_WHOLE_QUAD(0),
diff --git a/src/r600_shader.h b/src/r600_shader.h
index 6c12614..a68d6c2 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -353,7 +353,6 @@ extern int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
extern int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
extern int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
-extern int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* ps);
extern int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
#endif
diff --git a/src/radeon.h b/src/radeon.h
index 56bc076..61f07ba 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -751,7 +751,6 @@ struct radeon_accel_state {
uint32_t copy_ps_offset;
uint32_t comp_vs_offset;
uint32_t comp_ps_offset;
- uint32_t comp_mask_ps_offset;
uint32_t xv_vs_offset;
uint32_t xv_ps_offset;
commit 1c17f3a192f644e8e38b5cfb1470f49434bfba27
Author: Alex Deucher <alexdeucher at gmail.com>
Date: Fri Jul 30 16:34:54 2010 -0400
r6xx/r7xx: clean up composite vertex shader
keep CF, ALU, Fetch instructions in separate groups
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 7e25f6d..47bc007 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1391,7 +1391,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(0));
/* 1 */
- shader[i++] = CF_DWORD0(ADDR(28));
+ shader[i++] = CF_DWORD0(ADDR(9));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_NOT_BOOL),
@@ -1415,7 +1415,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
WHOLE_QUAD_MODE(0),
BARRIER(1));
/* 3 - mask sub */
- shader[i++] = CF_DWORD0(ADDR(22));
+ shader[i++] = CF_DWORD0(ADDR(32));
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
@@ -1428,7 +1428,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
BARRIER(1));
/* 4 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(9),
+ shader[i++] = CF_ALU_DWORD0(ADDR(14),
KCACHE_BANK0(0),
KCACHE_BANK1(0),
KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -1507,9 +1507,84 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CF_INST(SQ_CF_INST_RETURN),
WHOLE_QUAD_MODE(0),
BARRIER(1));
+ /* 9 - non-mask sub */
+ shader[i++] = CF_DWORD0(ADDR(38));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(2),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_VTX),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 10 - ALU */
+ shader[i++] = CF_ALU_DWORD0(ADDR(26),
+ KCACHE_BANK0(0),
+ KCACHE_BANK1(0),
+ KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+ shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+ KCACHE_ADDR0(0),
+ KCACHE_ADDR1(0),
+ I_COUNT(6),
+ USES_WATERFALL(0),
+ CF_INST(SQ_CF_INST_ALU),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+
+ /* 11 - dst */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+ TYPE(SQ_EXPORT_POS),
+ RW_GPR(1),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
+ /* 12 - src */
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+ TYPE(SQ_EXPORT_PARAM),
+ RW_GPR(0),
+ RW_REL(ABSOLUTE),
+ INDEX_GPR(0),
+ ELEM_SIZE(0));
+ shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+ SRC_SEL_Y(SQ_SEL_Y),
+ SRC_SEL_Z(SQ_SEL_0),
+ SRC_SEL_W(SQ_SEL_1),
+ R6xx_ELEM_LOOP(0),
+ BURST_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_EXPORT_DONE),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(0));
+ /* 13 */
+ shader[i++] = CF_DWORD0(ADDR(0));
+ shader[i++] = CF_DWORD1(POP_COUNT(0),
+ CF_CONST(0),
+ COND(SQ_CF_COND_ACTIVE),
+ I_COUNT(0),
+ CALL_COUNT(0),
+ END_OF_PROGRAM(0),
+ VALID_PIXEL_MODE(0),
+ CF_INST(SQ_CF_INST_RETURN),
+ WHOLE_QUAD_MODE(0),
+ BARRIER(1));
- /* 9 srcX MAD */
+ /* 14 srcX MAD - mask */
shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1531,7 +1606,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 10 srcY MAD */
+ /* 15 srcY MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1554,7 +1629,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 11 srcX MAD */
+ /* 16 srcX MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -1576,7 +1651,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 12 srcY MAD */
+ /* 17 srcY MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -1599,7 +1674,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 13 maskX MAD */
+ /* 18 maskX MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1622,7 +1697,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 14 maskY MAD */
+ /* 19 maskY MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(259),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1645,7 +1720,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 15 srcX MAD */
+ /* 20 srcX MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(258),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -1667,7 +1742,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 16 srcY MAD */
+ /* 21 srcY MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(259),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -1690,7 +1765,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 17 srcX / w */
+ /* 22 srcX / w */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -1717,7 +1792,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 18 srcY / h */
+ /* 23 srcY / h */
shader[i++] = ALU_DWORD0(SRC0_SEL(1),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1744,7 +1819,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 19 maskX / w */
+ /* 24 maskX / w */
shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -1771,7 +1846,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 20 maskY / h */
+ /* 25 maskY / h */
shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1797,164 +1872,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 21 */
- shader[i++] = 0x00000000;
- shader[i++] = 0x00000000;
-
- /* 22/23 - dst */
- shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
- FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
- FETCH_WHOLE_QUAD(0),
- BUFFER_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- SRC_SEL_X(SQ_SEL_X),
- MEGA_FETCH_COUNT(24));
- shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
- DST_REL(0),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_0),
- DST_SEL_W(SQ_SEL_1),
- USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
- SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
- shader[i++] = VTX_DWORD2(OFFSET(0),
- ENDIAN_SWAP(ENDIAN_NONE),
- CONST_BUF_NO_STRIDE(0),
- MEGA_FETCH(1));
- shader[i++] = VTX_DWORD_PAD;
- /* 24/25 - src */
- shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
- FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
- FETCH_WHOLE_QUAD(0),
- BUFFER_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- SRC_SEL_X(SQ_SEL_X),
- MEGA_FETCH_COUNT(8));
- shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
- DST_REL(0),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_1),
- DST_SEL_W(SQ_SEL_0),
- USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
- SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
- shader[i++] = VTX_DWORD2(OFFSET(8),
- ENDIAN_SWAP(ENDIAN_NONE),
- CONST_BUF_NO_STRIDE(0),
- MEGA_FETCH(0));
- shader[i++] = VTX_DWORD_PAD;
- /* 26/27 - mask */
- shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
- FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
- FETCH_WHOLE_QUAD(0),
- BUFFER_ID(0),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- SRC_SEL_X(SQ_SEL_X),
- MEGA_FETCH_COUNT(8));
- shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
- DST_REL(0),
- DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_Y),
- DST_SEL_Z(SQ_SEL_1),
- DST_SEL_W(SQ_SEL_0),
- USE_CONST_FIELDS(0),
- DATA_FORMAT(FMT_32_32_FLOAT),
- NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
- FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
- SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
- shader[i++] = VTX_DWORD2(OFFSET(16),
- ENDIAN_SWAP(ENDIAN_NONE),
- CONST_BUF_NO_STRIDE(0),
- MEGA_FETCH(0));
- shader[i++] = VTX_DWORD_PAD;
- /* 28 - non-mask sub */
- shader[i++] = CF_DWORD0(ADDR(40));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_VTX),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 29 - ALU */
- shader[i++] = CF_ALU_DWORD0(ADDR(33),
- KCACHE_BANK0(0),
- KCACHE_BANK1(0),
- KCACHE_MODE0(SQ_CF_KCACHE_NOP));
- shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
- KCACHE_ADDR0(0),
- KCACHE_ADDR1(0),
- I_COUNT(6),
- USES_WATERFALL(0),
- CF_INST(SQ_CF_INST_ALU),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
- /* 30 - dst */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
- TYPE(SQ_EXPORT_POS),
- RW_GPR(1),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(0));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
- /* 31 - src */
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
- TYPE(SQ_EXPORT_PARAM),
- RW_GPR(0),
- RW_REL(ABSOLUTE),
- INDEX_GPR(0),
- ELEM_SIZE(0));
- shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1),
- R6xx_ELEM_LOOP(0),
- BURST_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_EXPORT_DONE),
- WHOLE_QUAD_MODE(0),
- BARRIER(0));
- /* 32 */
- shader[i++] = CF_DWORD0(ADDR(0));
- shader[i++] = CF_DWORD1(POP_COUNT(0),
- CF_CONST(0),
- COND(SQ_CF_COND_ACTIVE),
- I_COUNT(0),
- CALL_COUNT(0),
- END_OF_PROGRAM(0),
- VALID_PIXEL_MODE(0),
- CF_INST(SQ_CF_INST_RETURN),
- WHOLE_QUAD_MODE(0),
- BARRIER(1));
-
-
- /* 33 srcX MAD */
+ /* 26 srcX MAD - non-mask */
shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1976,7 +1895,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Z),
CLAMP(0));
- /* 34 srcY MAD */
+ /* 27 srcY MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -1999,7 +1918,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_W),
CLAMP(0));
- /* 35 srcX MAD */
+ /* 28 srcX MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(256),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -2021,7 +1940,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 36 srcY MAD */
+ /* 29 srcY MAD */
shader[i++] = ALU_DWORD0(SRC0_SEL(257),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -2043,7 +1962,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_REL(ABSOLUTE),
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 37 srcX / w */
+ /* 30 srcX / w */
shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_X),
@@ -2070,7 +1989,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_X),
CLAMP(0));
- /* 38 srcY / h */
+ /* 31 srcY / h */
shader[i++] = ALU_DWORD0(SRC0_SEL(0),
SRC0_REL(ABSOLUTE),
SRC0_ELEM(ELEM_Y),
@@ -2097,11 +2016,83 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
DST_ELEM(ELEM_Y),
CLAMP(0));
- /* 39 */
- shader[i++] = 0x00000000;
- shader[i++] = 0x00000000;
+ /* 32/33 - dst - mask */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(24));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_0),
+ DST_SEL_W(SQ_SEL_1),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(0),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(1));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 34/35 - src */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(8),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ shader[i++] = VTX_DWORD_PAD;
+ /* 36/37 - mask */
+ shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+ FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+ FETCH_WHOLE_QUAD(0),
+ BUFFER_ID(0),
+ SRC_GPR(0),
+ SRC_REL(ABSOLUTE),
+ SRC_SEL_X(SQ_SEL_X),
+ MEGA_FETCH_COUNT(8));
+ shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+ DST_REL(0),
+ DST_SEL_X(SQ_SEL_X),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_1),
+ DST_SEL_W(SQ_SEL_0),
+ USE_CONST_FIELDS(0),
+ DATA_FORMAT(FMT_32_32_FLOAT),
+ NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+ FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+ SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+ shader[i++] = VTX_DWORD2(OFFSET(16),
+ ENDIAN_SWAP(ENDIAN_NONE),
+ CONST_BUF_NO_STRIDE(0),
+ MEGA_FETCH(0));
+ shader[i++] = VTX_DWORD_PAD;
- /* 40/41 - dst */
+ /* 38/39 - dst - non-mask */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
@@ -2126,7 +2117,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
CONST_BUF_NO_STRIDE(0),
MEGA_FETCH(1));
shader[i++] = VTX_DWORD_PAD;
- /* 42/43 - src */
+ /* 40/41 - src */
shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
FETCH_WHOLE_QUAD(0),
More information about the xorg-commit
mailing list