[Mesa-dev] [PATCH] r600g: fix the representation of control-flow instructions

Marek Olšák maraeo at gmail.com
Mon Nov 14 09:37:48 PST 2011


We need something that looks like a compiler and not like some hacker
put some functions together. /rant

This is a band-aid for these two problems:
- The R600 and EG control-flow instructions appear in switch statements
  next to each other, causing conflicts when adding new instructions.
- The ALU control-flow instructions are bitshifted by 3 (from CF_INST 26:29
  to CF_INST 23:29, as is defined by r600 ISA) even for EG, where CF_INST
  is 22:29.

To fix this mess, the 'inst' field is bitshifted to the left either by 22, 23,
or 26 (directly in the definitions), such that it can be just or'd when making
bytecode without any shifting. All switch statements have been divided into
two, one for R600 and the other for EG.

Of course, there is a better way to do this, but that is left for future
work.

Tested on RV730 and REDWOOD with no regressions.
---
 src/gallium/drivers/r600/eg_asm.c       |   18 +-
 src/gallium/drivers/r600/eg_sq.h        |    8 -
 src/gallium/drivers/r600/r600_asm.c     |  581 ++++++++++++++++++++-----------
 src/gallium/drivers/r600/r600_asm.h     |    6 +
 src/gallium/drivers/r600/r600_opcodes.h |  264 ++++++++-------
 src/gallium/drivers/r600/r600_shader.c  |   12 +-
 src/gallium/drivers/r600/r600d.h        |   34 --
 src/gallium/drivers/r600/r700_asm.c     |    2 +-
 src/gallium/drivers/r600/r700_sq.h      |   41 ---
 9 files changed, 540 insertions(+), 426 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index ca25b34..f6b8631 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -34,15 +34,15 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 	unsigned id = cf->id;
 
 	switch (cf->inst) {
-	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-	case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+	case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+	case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+	case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+	case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
-		bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
+		bc->bytecode[id++] = cf->inst |
 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
@@ -52,7 +52,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
-		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+		bc->bytecode[id++] = cf->inst |
 					S_SQ_CF_WORD1_BARRIER(1) |
 					S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
 		break;
@@ -68,11 +68,10 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst);
+			cf->output.inst;
 		if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
 			bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 		id++;
-
 		break;
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
@@ -85,11 +84,10 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 	case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 	case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
-		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+		bc->bytecode[id++] = cf->inst |
 					S_SQ_CF_WORD1_BARRIER(1) |
 					S_SQ_CF_WORD1_COND(cf->cond) |
 					S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
-
 		break;
 	default:
 		R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
diff --git a/src/gallium/drivers/r600/eg_sq.h b/src/gallium/drivers/r600/eg_sq.h
index f80e8bd..854c1b8 100644
--- a/src/gallium/drivers/r600/eg_sq.h
+++ b/src/gallium/drivers/r600/eg_sq.h
@@ -97,14 +97,6 @@
 #define   S_SQ_CF_ALU_WORD1_CF_INST(x)                               (((x) & 0xF) << 26)
 #define   G_SQ_CF_ALU_WORD1_CF_INST(x)                               (((x) >> 26) & 0xF)
 #define   C_SQ_CF_ALU_WORD1_CF_INST                                  0xC3FFFFFF
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         0x00000008
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             0x00000009
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               0x0000000A
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              0x0000000B
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED                    0x0000000C
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                0x0000000D
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   0x0000000E
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              0x0000000F
 #define   S_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x)                       (((x) & 0x1) << 30)
 #define   G_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x)                       (((x) >> 30) & 0x1)
 #define   C_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE                          0xBFFFFFFF
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index f923c62..02586c9 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -1160,7 +1160,7 @@ static int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc, struct r60
 		if ((r = r600_bytecode_add_cf(bc))) {
 			return r;
 		}
-		bc->cf_last->inst = (type << 3);
+		bc->cf_last->inst = type;
 		kcache = bc->cf_last->kcache;
 	}
 
@@ -1248,10 +1248,10 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytec
 		return -ENOMEM;
 	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
 
-	if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
+	if (bc->cf_last != NULL && bc->cf_last->inst != type) {
 		/* check if we could add it anyway */
-		if (bc->cf_last->inst == (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) &&
-			type == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE) {
+		if (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) &&
+			type == BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE)) {
 			LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
 				if (lalu->predicate) {
 					bc->force_add_cf = 1;
@@ -1270,7 +1270,7 @@ int r600_bytecode_add_alu_type(struct r600_bytecode *bc, const struct r600_bytec
 			return r;
 		}
 	}
-	bc->cf_last->inst = (type << 3);
+	bc->cf_last->inst = type;
 
 	/* Check AR usage and load it if required */
 	for (i = 0; i < 3; i++)
@@ -1379,17 +1379,21 @@ static unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_byt
 	}
 }
 
-static inline boolean last_inst_was_vtx_fetch(struct r600_bytecode *bc)
+static inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc)
 {
-	if (bc->chip_class == CAYMAN) {
-		if (bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC)
-			return TRUE;
-	} else {
-		if (bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
-		    bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC)
-			return TRUE;
+	switch (bc->chip_class) {
+	case R700:
+	case R600:
+		return bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX &&
+		       bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC;
+	case EVERGREEN:
+		return bc->cf_last->inst != EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX;
+	case CAYMAN:
+		return bc->cf_last->inst != CM_V_SQ_CF_WORD1_SQ_CF_INST_TC;
+	default:
+		R600_ERR("Unknown chip class %d.\n", bc->chip_class);
+		return FALSE;
 	}
-	return FALSE;
 }
 
 int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
@@ -1403,17 +1407,28 @@ int r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_v
 
 	/* cf can contains only alu or only vtx or only tex */
 	if (bc->cf_last == NULL ||
-	    last_inst_was_vtx_fetch(bc) ||
+	    last_inst_was_not_vtx_fetch(bc) ||
 	    bc->force_add_cf) {
 		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(nvtx);
 			return r;
 		}
-		if (bc->chip_class == CAYMAN)
-			bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC;
-		else
+		switch (bc->chip_class) {
+		case R600:
+		case R700:
 			bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_VTX;
+			break;
+		case EVERGREEN:
+			bc->cf_last->inst = EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX;
+			break;
+		case CAYMAN:
+			bc->cf_last->inst = CM_V_SQ_CF_WORD1_SQ_CF_INST_TC;
+			break;
+		default:
+			R600_ERR("Unknown chip class %d.\n", bc->chip_class);
+			return -EINVAL;
+		}
 	}
 	LIST_ADDTAIL(&nvtx->list, &bc->cf_last->vtx);
 	/* each fetch use 4 dwords */
@@ -1435,7 +1450,7 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t
 
 	/* we can't fetch data und use it as texture lookup address in the same TEX clause */
 	if (bc->cf_last != NULL &&
-		bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
+		bc->cf_last->inst == BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX)) {
 		struct r600_bytecode_tex *ttex;
 		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
 			if (ttex->dst_gpr == ntex->src_gpr) {
@@ -1450,14 +1465,14 @@ int r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_t
 
 	/* cf can contains only alu or only vtx or only tex */
 	if (bc->cf_last == NULL ||
-		bc->cf_last->inst != V_SQ_CF_WORD1_SQ_CF_INST_TEX ||
+		bc->cf_last->inst != BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX) ||
 	        bc->force_add_cf) {
 		r = r600_bytecode_add_cf(bc);
 		if (r) {
 			free(ntex);
 			return r;
 		}
-		bc->cf_last->inst = V_SQ_CF_WORD1_SQ_CF_INST_TEX;
+		bc->cf_last->inst = BC_INST(bc, V_SQ_CF_WORD1_SQ_CF_INST_TEX);
 	}
 	if (ntex->src_gpr >= bc->ngpr) {
 		bc->ngpr = ntex->src_gpr + 1;
@@ -1595,7 +1610,7 @@ static int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecod
 static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
 {
 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
-	*bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+	*bytecode++ = cf->inst |
 			S_SQ_CF_WORD1_BARRIER(1) |
 			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
 }
@@ -1606,16 +1621,16 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
 	unsigned id = cf->id;
 
 	switch (cf->inst) {
-	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
-	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
+	case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+	case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+	case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+	case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
 			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
 			S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
 
-		bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
+		bc->bytecode[id++] = cf->inst |
 			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
 			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
@@ -1643,7 +1658,7 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
+			cf->output.inst |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 		break;
 	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
@@ -1656,7 +1671,7 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
 	case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
 	case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
-		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+		bc->bytecode[id++] = cf->inst |
 					S_SQ_CF_WORD1_BARRIER(1) |
 			                S_SQ_CF_WORD1_COND(cf->cond) |
 			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
@@ -1690,38 +1705,64 @@ int r600_bytecode_build(struct r600_bytecode *bc)
 	/* addr start after all the CF instructions */
 	addr = bc->cf_last->id + 2;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
-		switch (cf->inst) {
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
-			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
-			/* fetch node need to be 16 bytes aligned*/
-			addr += 3;
-			addr &= 0xFFFFFFFCUL;
-			break;
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
-		case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
-			break;
-		default:
-			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
-			return -EINVAL;
+		if (bc->chip_class >= EVERGREEN) {
+			switch (cf->inst) {
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+				/* fetch node need to be 16 bytes aligned*/
+				addr += 3;
+				addr &= 0xFFFFFFFCUL;
+				break;
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+			case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+			case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+			case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
+				break;
+			default:
+				R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+				return -EINVAL;
+			}
+		} else {
+			switch (cf->inst) {
+			case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+			case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+			case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+				/* fetch node need to be 16 bytes aligned*/
+				addr += 3;
+				addr &= 0xFFFFFFFCUL;
+				break;
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+			case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+			case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+			case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+			case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+				break;
+			default:
+				R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+				return -EINVAL;
+			}
 		}
 		cf->addr = addr;
 		addr += cf->ndw;
@@ -1733,92 +1774,158 @@ int r600_bytecode_build(struct r600_bytecode *bc)
 		return -ENOMEM;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		addr = cf->addr;
-		if (bc->chip_class >= EVERGREEN)
+		if (bc->chip_class >= EVERGREEN) {
 			r = eg_bytecode_cf_build(bc, cf);
-		else
-			r = r600_bytecode_cf_build(bc, cf);
-		if (r)
-			return r;
-		switch (cf->inst) {
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
-			nliteral = 0;
-			memset(literal, 0, sizeof(literal));
-			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
-				r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
-				if (r)
-					return r;
-				r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
-				switch(bc->chip_class) {
-				case R600:
-					r = r600_bytecode_alu_build(bc, alu, addr);
-					break;
-				case R700:
-				case EVERGREEN: /* eg alu is same encoding as r700 */
-				case CAYMAN: /* eg alu is same encoding as r700 */
-					r = r700_bytecode_alu_build(bc, alu, addr);
-					break;
-				default:
-					R600_ERR("unknown chip class %d.\n", bc->chip_class);
-					return -EINVAL;
+			if (r)
+				return r;
+
+			switch (cf->inst) {
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+				nliteral = 0;
+				memset(literal, 0, sizeof(literal));
+				LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+					r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+					if (r)
+						return r;
+					r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
+					switch(bc->chip_class) {
+					case EVERGREEN: /* eg alu is same encoding as r700 */
+					case CAYMAN:
+						r = r700_bytecode_alu_build(bc, alu, addr);
+						break;
+					default:
+						R600_ERR("unknown chip class %d.\n", bc->chip_class);
+						return -EINVAL;
+					}
+					if (r)
+						return r;
+					addr += 2;
+					if (alu->last) {
+						for (i = 0; i < align(nliteral, 2); ++i) {
+							bc->bytecode[addr++] = literal[i];
+						}
+						nliteral = 0;
+						memset(literal, 0, sizeof(literal));
+					}
 				}
-				if (r)
-					return r;
-				addr += 2;
-				if (alu->last) {
-					for (i = 0; i < align(nliteral, 2); ++i) {
-						bc->bytecode[addr++] = literal[i];
+				break;
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+				LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+					r = r600_bytecode_vtx_build(bc, vtx, addr);
+					if (r)
+						return r;
+					addr += 4;
+				}
+				break;
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+				if (bc->chip_class == CAYMAN) {
+					LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
+						r = r600_bytecode_vtx_build(bc, vtx, addr);
+						if (r)
+							return r;
+						addr += 4;
 					}
-					nliteral = 0;
-					memset(literal, 0, sizeof(literal));
 				}
+				LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+					r = r600_bytecode_tex_build(bc, tex, addr);
+					if (r)
+						return r;
+					addr += 4;
+				}
+				break;
+			case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+			case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+			case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
+				break;
+			default:
+				R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+				return -EINVAL;
 			}
-			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
-			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
-				r = r600_bytecode_vtx_build(bc, vtx, addr);
-				if (r)
-					return r;
-				addr += 4;
-			}
-			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-			if (bc->chip_class == CAYMAN) {
+		} else {
+			r = r600_bytecode_cf_build(bc, cf);
+			if (r)
+				return r;
+
+			switch (cf->inst) {
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+				nliteral = 0;
+				memset(literal, 0, sizeof(literal));
+				LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+					r = r600_bytecode_alu_nliterals(bc, alu, literal, &nliteral);
+					if (r)
+						return r;
+					r600_bytecode_alu_adjust_literals(bc, alu, literal, nliteral);
+					switch(bc->chip_class) {
+					case R600:
+						r = r600_bytecode_alu_build(bc, alu, addr);
+						break;
+					case R700:
+						r = r700_bytecode_alu_build(bc, alu, addr);
+						break;
+					default:
+						R600_ERR("unknown chip class %d.\n", bc->chip_class);
+						return -EINVAL;
+					}
+					if (r)
+						return r;
+					addr += 2;
+					if (alu->last) {
+						for (i = 0; i < align(nliteral, 2); ++i) {
+							bc->bytecode[addr++] = literal[i];
+						}
+						nliteral = 0;
+						memset(literal, 0, sizeof(literal));
+					}
+				}
+				break;
+			case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+			case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
 				LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
 					r = r600_bytecode_vtx_build(bc, vtx, addr);
 					if (r)
 						return r;
 					addr += 4;
 				}
+				break;
+			case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+				LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
+					r = r600_bytecode_tex_build(bc, tex, addr);
+					if (r)
+						return r;
+					addr += 4;
+				}
+				break;
+			case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+			case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+			case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+			case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+				break;
+			default:
+				R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
+				return -EINVAL;
 			}
-			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
-				r = r600_bytecode_tex_build(bc, tex, addr);
-				if (r)
-					return r;
-				addr += 4;
-			}
-			break;
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
-		case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
-			break;
-		default:
-			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
-			return -EINVAL;
 		}
 	}
 	return 0;
@@ -1893,72 +2000,144 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		id = cf->id;
 
-		switch (cf->inst) {
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
-		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
-			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%d ", cf->addr);
-			fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
-			fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
-			fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
-			id++;
-			fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
-			fprintf(stderr, "INST:0x%x ", cf->inst);
-			fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
-			fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
-			fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
-			fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
-			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
-		case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
-			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%d\n", cf->addr);
-			id++;
-			fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
-			fprintf(stderr, "INST:0x%x ", cf->inst);
-			fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
-			break;
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
-		case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
-			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
-			fprintf(stderr, "GPR:%X ", cf->output.gpr);
-			fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
-			fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
-			fprintf(stderr, "TYPE:%X\n", cf->output.type);
-			id++;
-			fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
-			fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
-			fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
-			fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
-			fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
-			fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
-			fprintf(stderr, "INST:0x%x ", cf->output.inst);
-			fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
-			fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
-			break;
-		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
-		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
-		case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
-		case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
-		case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
-			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
-			fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
-			id++;
-			fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
-			fprintf(stderr, "INST:0x%x ", cf->inst);
-			fprintf(stderr, "COND:%X ", cf->cond);
-			fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
-			break;
+		if (bc->chip_class >= EVERGREEN) {
+			switch (cf->inst) {
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+			case EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+				fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+				fprintf(stderr, "ADDR:%d ", cf->addr);
+				fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+				fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+				fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
+				id++;
+				fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+				fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_ALU_WORD1_CF_INST(cf->inst));
+				fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+				fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+				fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
+				fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+				break;
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+				fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+				fprintf(stderr, "ADDR:%d\n", cf->addr);
+				id++;
+				fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+				fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_WORD1_CF_INST(cf->inst));
+				fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+				break;
+			case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+			case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+				fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+				fprintf(stderr, "GPR:%X ", cf->output.gpr);
+				fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+				fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+				fprintf(stderr, "TYPE:%X\n", cf->output.type);
+				id++;
+				fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+				fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+				fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+				fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+				fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+				fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+				fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst));
+				fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
+				fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+				break;
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+			case EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+			case CM_V_SQ_CF_WORD1_SQ_CF_INST_END:
+				fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+				fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+				id++;
+				fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+				if (bc->chip_class >= EVERGREEN) {
+					fprintf(stderr, "INST:0x%x ", EG_G_SQ_CF_WORD1_CF_INST(cf->inst));
+				} else {
+					fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst));
+				}
+				fprintf(stderr, "COND:%X ", cf->cond);
+				fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+				break;
+			default:
+				R600_ERR("Unknown instruction %0x\n", cf->inst);
+			}
+		} else {
+			switch (cf->inst) {
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER:
+			case V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE:
+				fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+				fprintf(stderr, "ADDR:%d ", cf->addr);
+				fprintf(stderr, "KCACHE_MODE0:%X ", cf->kcache[0].mode);
+				fprintf(stderr, "KCACHE_BANK0:%X ", cf->kcache[0].bank);
+				fprintf(stderr, "KCACHE_BANK1:%X\n", cf->kcache[1].bank);
+				id++;
+				fprintf(stderr, "%04d %08X ALU ", id, bc->bytecode[id]);
+				fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_ALU_WORD1_CF_INST(cf->inst));
+				fprintf(stderr, "KCACHE_MODE1:%X ", cf->kcache[1].mode);
+				fprintf(stderr, "KCACHE_ADDR0:%X ", cf->kcache[0].addr);
+				fprintf(stderr, "KCACHE_ADDR1:%X ", cf->kcache[1].addr);
+				fprintf(stderr, "COUNT:%d\n", cf->ndw / 2);
+				break;
+			case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
+			case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
+			case V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC:
+				fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+				fprintf(stderr, "ADDR:%d\n", cf->addr);
+				id++;
+				fprintf(stderr, "%04d %08X TEX/VTX ", id, bc->bytecode[id]);
+				fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst));
+				fprintf(stderr, "COUNT:%d\n", cf->ndw / 4);
+				break;
+			case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
+			case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+				fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+				fprintf(stderr, "GPR:%X ", cf->output.gpr);
+				fprintf(stderr, "ELEM_SIZE:%X ", cf->output.elem_size);
+				fprintf(stderr, "ARRAY_BASE:%X ", cf->output.array_base);
+				fprintf(stderr, "TYPE:%X\n", cf->output.type);
+				id++;
+				fprintf(stderr, "%04d %08X EXPORT ", id, bc->bytecode[id]);
+				fprintf(stderr, "SWIZ_X:%X ", cf->output.swizzle_x);
+				fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
+				fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
+				fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
+				fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
+				fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst));
+				fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
+				fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
+				break;
+			case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+			case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
+			case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS:
+			case V_SQ_CF_WORD1_SQ_CF_INST_RETURN:
+				fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+				fprintf(stderr, "ADDR:%d\n", cf->cf_addr);
+				id++;
+				fprintf(stderr, "%04d %08X CF ", id, bc->bytecode[id]);
+				fprintf(stderr, "INST:0x%x ", R600_G_SQ_CF_WORD1_CF_INST(cf->inst));
+				fprintf(stderr, "COND:%X ", cf->cond);
+				fprintf(stderr, "POP_COUNT:%X\n", cf->pop_count);
+				break;
+			default:
+				R600_ERR("Unknown instruction %0x\n", cf->inst);
+			}
 		}
 
 		id = cf->addr;
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index f4a6cfd..0fd4467 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -109,7 +109,10 @@ struct r600_bytecode_output {
 	unsigned			array_base;
 	unsigned			type;
 	unsigned			end_of_program;
+
+	/* CF_INST. This is already bit-shifted and only needs to be or'd for bytecode. */
 	unsigned			inst;
+
 	unsigned			elem_size;
 	unsigned			gpr;
 	unsigned			swizzle_x;
@@ -128,7 +131,10 @@ struct r600_bytecode_kcache {
 
 struct r600_bytecode_cf {
 	struct list_head		list;
+
+	/* CF_INST. This is already bit-shifted and only needs to be or'd for bytecode. */
 	unsigned			inst;
+
 	unsigned			addr;
 	unsigned			ndw;
 	unsigned			id;
diff --git a/src/gallium/drivers/r600/r600_opcodes.h b/src/gallium/drivers/r600/r600_opcodes.h
index 024b3a7..bc000ed 100644
--- a/src/gallium/drivers/r600/r600_opcodes.h
+++ b/src/gallium/drivers/r600/r600_opcodes.h
@@ -2,39 +2,53 @@
 #ifndef R600_OPCODES_H
 #define R600_OPCODES_H
 
-#define     V_SQ_CF_WORD1_SQ_CF_INST_NOP                             0x00000000
-#define     V_SQ_CF_WORD1_SQ_CF_INST_TEX                             0x00000001
-#define     V_SQ_CF_WORD1_SQ_CF_INST_VTX                             0x00000002
-#define     V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC                          0x00000003
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START                      0x00000004
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END                        0x00000005
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10                 0x00000006
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL                0x00000007
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE                   0x00000008
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK                      0x00000009
-#define     V_SQ_CF_WORD1_SQ_CF_INST_JUMP                            0x0000000A
-#define     V_SQ_CF_WORD1_SQ_CF_INST_PUSH                            0x0000000B
-#define     V_SQ_CF_WORD1_SQ_CF_INST_PUSH_ELSE                       0x0000000C
-#define     V_SQ_CF_WORD1_SQ_CF_INST_ELSE                            0x0000000D
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP                             0x0000000E
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_JUMP                        0x0000000F
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH                        0x00000010
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH_ELSE                   0x00000011
-#define     V_SQ_CF_WORD1_SQ_CF_INST_CALL                            0x00000012
-#define     V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS                         0x00000013
-#define     V_SQ_CF_WORD1_SQ_CF_INST_RETURN                          0x00000014
-#define     V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX                     0x00000015
-#define     V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX                 0x00000016
-#define     V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX                      0x00000017
-#define     V_SQ_CF_WORD1_SQ_CF_INST_KILL                            0x00000018
+#define R600_S_SQ_CF_WORD1_CF_INST(x)                              (((x) & 0x7F) << 23)
+#define R600_G_SQ_CF_WORD1_CF_INST(x)                              (((x) >> 23) & 0x7F)
+#define R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x)                 (((x) & 0x7F) << 23)
+#define R600_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x)                 (((x) >> 23) & 0x7F)
+#define R600_S_SQ_CF_ALU_WORD1_CF_INST(x)                          (((x) & 0xF) << 26)
+#define R600_G_SQ_CF_ALU_WORD1_CF_INST(x)                          (((x) >> 26) & 0xF)
 
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         0x00000008
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             0x00000009
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               0x0000000A
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              0x0000000B
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                0x0000000D
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   0x0000000E
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              0x0000000F
+#define EG_S_SQ_CF_WORD1_CF_INST(x)                                (((x) & 0xFF) << 22)
+#define EG_G_SQ_CF_WORD1_CF_INST(x)                                (((x) >> 22) & 0xFF)
+#define EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x)                   (((x) & 0xFF) << 22)
+#define EG_G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x)                   (((x) >> 22) & 0xFF)
+#define EG_S_SQ_CF_ALU_WORD1_CF_INST(x)                            (((x) & 0xF) << 26)
+#define EG_G_SQ_CF_ALU_WORD1_CF_INST(x)                            (((x) >> 26) & 0xF)
+
+#define     V_SQ_CF_WORD1_SQ_CF_INST_NOP                             R600_S_SQ_CF_WORD1_CF_INST(0x00000000)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_TEX                             R600_S_SQ_CF_WORD1_CF_INST(0x00000001)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_VTX                             R600_S_SQ_CF_WORD1_CF_INST(0x00000002)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC                          R600_S_SQ_CF_WORD1_CF_INST(0x00000003)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START                      R600_S_SQ_CF_WORD1_CF_INST(0x00000004)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END                        R600_S_SQ_CF_WORD1_CF_INST(0x00000005)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10                 R600_S_SQ_CF_WORD1_CF_INST(0x00000006)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL                R600_S_SQ_CF_WORD1_CF_INST(0x00000007)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE                   R600_S_SQ_CF_WORD1_CF_INST(0x00000008)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK                      R600_S_SQ_CF_WORD1_CF_INST(0x00000009)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_JUMP                            R600_S_SQ_CF_WORD1_CF_INST(0x0000000A)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_PUSH                            R600_S_SQ_CF_WORD1_CF_INST(0x0000000B)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_PUSH_ELSE                       R600_S_SQ_CF_WORD1_CF_INST(0x0000000C)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_ELSE                            R600_S_SQ_CF_WORD1_CF_INST(0x0000000D)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_POP                             R600_S_SQ_CF_WORD1_CF_INST(0x0000000E)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_JUMP                        R600_S_SQ_CF_WORD1_CF_INST(0x0000000F)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH                        R600_S_SQ_CF_WORD1_CF_INST(0x00000010)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH_ELSE                   R600_S_SQ_CF_WORD1_CF_INST(0x00000011)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_CALL                            R600_S_SQ_CF_WORD1_CF_INST(0x00000012)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS                         R600_S_SQ_CF_WORD1_CF_INST(0x00000013)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_RETURN                          R600_S_SQ_CF_WORD1_CF_INST(0x00000014)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX                     R600_S_SQ_CF_WORD1_CF_INST(0x00000015)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX                 R600_S_SQ_CF_WORD1_CF_INST(0x00000016)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX                      R600_S_SQ_CF_WORD1_CF_INST(0x00000017)
+#define     V_SQ_CF_WORD1_SQ_CF_INST_KILL                            R600_S_SQ_CF_WORD1_CF_INST(0x00000018)
+
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         R600_S_SQ_CF_ALU_WORD1_CF_INST(0x00000008)
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             R600_S_SQ_CF_ALU_WORD1_CF_INST(0x00000009)
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               R600_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000A)
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              R600_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000B)
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                R600_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000D)
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   R600_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000E)
+#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              R600_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000F)
 
 #define     V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD                       0x00000000
 #define     V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL                       0x00000001
@@ -161,77 +175,77 @@
 #define     V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGT_INT                 0x0000001D
 #define     V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT                 0x0000001E
 
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0        0x00000020
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1        0x00000021
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2        0x00000022
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3        0x00000023
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH        0x00000024
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_REDUCTION      0x00000025
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING           0x00000026
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT             0x00000027
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE        0x00000028
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0        R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000020)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1        R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000021)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2        R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000022)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3        R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000023)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH        R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000024)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_REDUCTION      R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000025)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING           R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000026)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT             R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000027)
+#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE        R600_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000028)
 
 /* cayman doesn't have VTX */
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_NOP                             0x00000000
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX                             0x00000001
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_TC                              0x00000001
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX                             0x00000002
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_2                          0x00000002
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_GDS                             0x00000003
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START                      0x00000004
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END                        0x00000005
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10                 0x00000006
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL                0x00000007
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE                   0x00000008
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK                      0x00000009
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP                            0x0000000A
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_PUSH                            0x0000000B
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_12                                     0x0000000C /* resvd */
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE                            0x0000000D
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_POP                             0x0000000E
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_15                                     0x0000000F
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_16                                     0x00000010
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_17                                     0x00000011
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL                            0x00000012
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS                         0x00000013
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN                          0x00000014
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX                     0x00000015
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX                 0x00000016
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX                      0x00000017
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_KILL                            0x00000018
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_25                                     0x00000019
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_WAIT_ACK                        0x0000001a
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_TC_ACK                          0x0000001b
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_VC_ACK                          0x0000001c
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_28                         0x0000001c
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMPTABLE                       0x0000001d
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_GLOBAL_WAVE_SYNC                0x0000001e
-#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT                            0x0000001f
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_NOP                             EG_S_SQ_CF_WORD1_CF_INST(0x00000000)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX                             EG_S_SQ_CF_WORD1_CF_INST(0x00000001)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_TC                              EG_S_SQ_CF_WORD1_CF_INST(0x00000001)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX                             EG_S_SQ_CF_WORD1_CF_INST(0x00000002)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_2                          EG_S_SQ_CF_WORD1_CF_INST(0x00000002)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_GDS                             EG_S_SQ_CF_WORD1_CF_INST(0x00000003)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START                      EG_S_SQ_CF_WORD1_CF_INST(0x00000004)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END                        EG_S_SQ_CF_WORD1_CF_INST(0x00000005)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10                 EG_S_SQ_CF_WORD1_CF_INST(0x00000006)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL                EG_S_SQ_CF_WORD1_CF_INST(0x00000007)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE                   EG_S_SQ_CF_WORD1_CF_INST(0x00000008)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK                      EG_S_SQ_CF_WORD1_CF_INST(0x00000009)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMP                            EG_S_SQ_CF_WORD1_CF_INST(0x0000000A)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_PUSH                            EG_S_SQ_CF_WORD1_CF_INST(0x0000000B)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_12                         EG_S_SQ_CF_WORD1_CF_INST(0x0000000C) /* resvd */
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE                            EG_S_SQ_CF_WORD1_CF_INST(0x0000000D)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_POP                             EG_S_SQ_CF_WORD1_CF_INST(0x0000000E)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_15                         EG_S_SQ_CF_WORD1_CF_INST(0x0000000F)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_16                         EG_S_SQ_CF_WORD1_CF_INST(0x00000010)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_17                         EG_S_SQ_CF_WORD1_CF_INST(0x00000011)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL                            EG_S_SQ_CF_WORD1_CF_INST(0x00000012)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS                         EG_S_SQ_CF_WORD1_CF_INST(0x00000013)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RETURN                          EG_S_SQ_CF_WORD1_CF_INST(0x00000014)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX                     EG_S_SQ_CF_WORD1_CF_INST(0x00000015)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX                 EG_S_SQ_CF_WORD1_CF_INST(0x00000016)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX                      EG_S_SQ_CF_WORD1_CF_INST(0x00000017)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_KILL                            EG_S_SQ_CF_WORD1_CF_INST(0x00000018)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_25                         EG_S_SQ_CF_WORD1_CF_INST(0x00000019)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_WAIT_ACK                        EG_S_SQ_CF_WORD1_CF_INST(0x0000001a)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_TC_ACK                          EG_S_SQ_CF_WORD1_CF_INST(0x0000001b)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_VC_ACK                          EG_S_SQ_CF_WORD1_CF_INST(0x0000001c)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_RSVD_28                         EG_S_SQ_CF_WORD1_CF_INST(0x0000001c)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_JUMPTABLE                       EG_S_SQ_CF_WORD1_CF_INST(0x0000001d)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_GLOBAL_WAVE_SYNC                EG_S_SQ_CF_WORD1_CF_INST(0x0000001e)
+#define     EG_V_SQ_CF_WORD1_SQ_CF_INST_HALT                            EG_S_SQ_CF_WORD1_CF_INST(0x0000001f)
 
 /* cayman extras */
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_END                             0x00000020
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_LDS_DEALLOC                     0x00000021
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_PUSH_WQM                        0x00000022
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_POP_WQM                         0x00000023
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_ELSE_WQM                        0x00000024
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_JUMP_ANY                        0x00000025
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE                      0x00000026
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE_WQM                  0x00000027
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT                       0x00000028
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT_AND_SLEEP             0x00000029
-#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_SET_PRIORITY                    0x00000030
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_END                             EG_S_SQ_CF_WORD1_CF_INST(0x00000020)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_LDS_DEALLOC                     EG_S_SQ_CF_WORD1_CF_INST(0x00000021)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_PUSH_WQM                        EG_S_SQ_CF_WORD1_CF_INST(0x00000022)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_POP_WQM                         EG_S_SQ_CF_WORD1_CF_INST(0x00000023)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_ELSE_WQM                        EG_S_SQ_CF_WORD1_CF_INST(0x00000024)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_JUMP_ANY                        EG_S_SQ_CF_WORD1_CF_INST(0x00000025)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE                      EG_S_SQ_CF_WORD1_CF_INST(0x00000026)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_REACTIVATE_WQM                  EG_S_SQ_CF_WORD1_CF_INST(0x00000027)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT                       EG_S_SQ_CF_WORD1_CF_INST(0x00000028)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_INTERRUPT_AND_SLEEP             EG_S_SQ_CF_WORD1_CF_INST(0x00000029)
+#define     CM_V_SQ_CF_WORD1_SQ_CF_INST_SET_PRIORITY                    EG_S_SQ_CF_WORD1_CF_INST(0x00000030)
 
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         0x00000008
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             0x00000009
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               0x0000000A
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              0x0000000B
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED                    0x0000000C
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                0x0000000D /* different on CAYMAN */
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   0x0000000E /* different on CAYMAN */
-#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              0x0000000F
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         EG_S_SQ_CF_ALU_WORD1_CF_INST(0x00000008)
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             EG_S_SQ_CF_ALU_WORD1_CF_INST(0x00000009)
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000A)
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000B)
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_EXTENDED                    EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000C)
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000D) /* different on CAYMAN */
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000E) /* different on CAYMAN */
+#define     EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000F)
 
-#define     CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_REACTIVATE_BEFORE       0x0000000D
-#define     CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_VALID_PIXEL_MODE        0x0000000E
+#define     CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_REACTIVATE_BEFORE       EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000D)
+#define     CM_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_VALID_PIXEL_MODE        EG_S_SQ_CF_ALU_WORD1_CF_INST(0x0000000E)
 
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD                       0x00000000
 #define     EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL                       0x00000001
@@ -380,34 +394,34 @@
 #define     EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_CNDGE_INT                 0x0000001E
 #define     EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT                   0x0000001F
 
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0   0x00000040
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1   0x00000041
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2   0x00000042
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3   0x00000043
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0   0x00000044
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1   0x00000045
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2   0x00000046
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3   0x00000047
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0   0x00000048
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1   0x00000049
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2   0x0000004A
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3   0x0000004B
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0   0x0000004C
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1   0x0000004D
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2   0x0000004E
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3   0x0000004F
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH        0x00000050
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING           0x00000052
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT             0x00000053
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE        0x00000054
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT         0x00000055
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT            0x00000056
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_CACHELESS  0x00000057
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING1          0x00000058
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING2          0x00000059
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING3          0x0000005A
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED 0x0000005B
-#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS  0x0000005C
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF0   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000040)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF1   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000041)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF2   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000042)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0_BUF3   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000043)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF0   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000044)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF1   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000045)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF2   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000046)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1_BUF3   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000047)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF0   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000048)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF1   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000049)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF2   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000004A)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2_BUF3   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000004B)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF0   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000004C)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000004D)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000004E)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3   EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000004F)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH        EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000050)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING           EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000052)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT             EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000053)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE        EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000054)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT         EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000055)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT            EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000056)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_CACHELESS  EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000057)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING1          EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000058)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING2          EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x00000059)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING3          EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000005A)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_EXPORT_COMBINED EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000005B)
+#define     EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS  EG_S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(0x0000005C)
 
 #define BC_INST(bc, x) ((bc)->chip_class >= EVERGREEN ? EG_##x : x)
 
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 48a2f7b..5645547 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -3082,17 +3082,17 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
 	if (!force_pop) {
 		int alu_pop = 3;
 		if (ctx->bc->cf_last) {
-			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU) << 3)
+			if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU))
 				alu_pop = 0;
-			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3)
+			else if (ctx->bc->cf_last->inst == CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER))
 				alu_pop = 1;
 		}
 		alu_pop += pops;
 		if (alu_pop == 1) {
-			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER) << 3;
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER);
 			ctx->bc->force_add_cf = 1;
 		} else if (alu_pop == 2) {
-			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER) << 3;
+			ctx->bc->cf_last->inst = CTX_INST(V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER);
 			ctx->bc->force_add_cf = 1;
 		} else {
 			force_pop = 1;
@@ -3200,14 +3200,14 @@ static void fc_poplevel(struct r600_shader_ctx *ctx)
 #if 0
 static int emit_return(struct r600_shader_ctx *ctx)
 {
-	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_RETURN));
 	return 0;
 }
 
 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
 {
 
-	r600_bytecode_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+	r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_JUMP));
 	ctx->bc->cf_last->pop_count = pops;
 	/* TODO work out offset */
 	return 0;
diff --git a/src/gallium/drivers/r600/r600d.h b/src/gallium/drivers/r600/r600d.h
index 079cd72..7a2fe02 100644
--- a/src/gallium/drivers/r600/r600d.h
+++ b/src/gallium/drivers/r600/r600d.h
@@ -1776,31 +1776,6 @@
 #define   S_008DFC_CF_INST(x)                          (((x) & 0x7F) << 23)
 #define   G_008DFC_CF_INST(x)                          (((x) >> 23) & 0x7F)
 #define   C_008DFC_CF_INST                             0xC07FFFFF
-#define     V_008DFC_SQ_CF_INST_NOP                    0x00000000
-#define     V_008DFC_SQ_CF_INST_TEX                    0x00000001
-#define     V_008DFC_SQ_CF_INST_VTX                    0x00000002
-#define     V_008DFC_SQ_CF_INST_VTX_TC                 0x00000003
-#define     V_008DFC_SQ_CF_INST_LOOP_START             0x00000004
-#define     V_008DFC_SQ_CF_INST_LOOP_END               0x00000005
-#define     V_008DFC_SQ_CF_INST_LOOP_START_DX10        0x00000006
-#define     V_008DFC_SQ_CF_INST_LOOP_START_NO_AL       0x00000007
-#define     V_008DFC_SQ_CF_INST_LOOP_CONTINUE          0x00000008
-#define     V_008DFC_SQ_CF_INST_LOOP_BREAK             0x00000009
-#define     V_008DFC_SQ_CF_INST_JUMP                   0x0000000A
-#define     V_008DFC_SQ_CF_INST_PUSH                   0x0000000B
-#define     V_008DFC_SQ_CF_INST_PUSH_ELSE              0x0000000C
-#define     V_008DFC_SQ_CF_INST_ELSE                   0x0000000D
-#define     V_008DFC_SQ_CF_INST_POP                    0x0000000E
-#define     V_008DFC_SQ_CF_INST_POP_JUMP               0x0000000F
-#define     V_008DFC_SQ_CF_INST_POP_PUSH               0x00000010
-#define     V_008DFC_SQ_CF_INST_POP_PUSH_ELSE          0x00000011
-#define     V_008DFC_SQ_CF_INST_CALL                   0x00000012
-#define     V_008DFC_SQ_CF_INST_CALL_FS                0x00000013
-#define     V_008DFC_SQ_CF_INST_RETURN                 0x00000014
-#define     V_008DFC_SQ_CF_INST_EMIT_VERTEX            0x00000015
-#define     V_008DFC_SQ_CF_INST_EMIT_CUT_VERTEX        0x00000016
-#define     V_008DFC_SQ_CF_INST_CUT_VERTEX             0x00000017
-#define     V_008DFC_SQ_CF_INST_KILL                   0x00000018
 #define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
 #define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
 #define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
@@ -1884,15 +1859,6 @@
 #define   S_008DFC_CF_INST(x)                          (((x) & 0x7F) << 23)
 #define   G_008DFC_CF_INST(x)                          (((x) >> 23) & 0x7F)
 #define   C_008DFC_CF_INST                             0xC07FFFFF
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM0            0x00000020
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM1            0x00000021
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM2            0x00000022
-#define     V_008DFC_SQ_CF_INST_MEM_STREAM3            0x00000023
-#define     V_008DFC_SQ_CF_INST_MEM_SCRATCH            0x00000024
-#define     V_008DFC_SQ_CF_INST_MEM_REDUCTION          0x00000025
-#define     V_008DFC_SQ_CF_INST_MEM_RING               0x00000026
-#define     V_008DFC_SQ_CF_INST_EXPORT                 0x00000027
-#define     V_008DFC_SQ_CF_INST_EXPORT_DONE            0x00000028
 #define   S_008DFC_WHOLE_QUAD_MODE(x)                  (((x) & 0x1) << 30)
 #define   G_008DFC_WHOLE_QUAD_MODE(x)                  (((x) >> 30) & 0x1)
 #define   C_008DFC_WHOLE_QUAD_MODE                     0xBFFFFFFF
diff --git a/src/gallium/drivers/r600/r700_asm.c b/src/gallium/drivers/r600/r700_asm.c
index 74efe22..fa304ed 100644
--- a/src/gallium/drivers/r600/r700_asm.c
+++ b/src/gallium/drivers/r600/r700_asm.c
@@ -30,7 +30,7 @@ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_c
 {
 	unsigned count = (cf->ndw / 4) - 1;
 	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
-	*bytecode++ = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+	*bytecode++ = cf->inst |
 			S_SQ_CF_WORD1_BARRIER(1) |
 			S_SQ_CF_WORD1_COUNT(count) |
 			S_SQ_CF_WORD1_COUNT_3(count >> 3);
diff --git a/src/gallium/drivers/r600/r700_sq.h b/src/gallium/drivers/r600/r700_sq.h
index 9a117ae..fa1387b 100644
--- a/src/gallium/drivers/r600/r700_sq.h
+++ b/src/gallium/drivers/r600/r700_sq.h
@@ -55,31 +55,6 @@
 #define   S_SQ_CF_WORD1_CF_INST(x)                                   (((x) & 0x7F) << 23)
 #define   G_SQ_CF_WORD1_CF_INST(x)                                   (((x) >> 23) & 0x7F)
 #define   C_SQ_CF_WORD1_CF_INST                                      0xC07FFFFF
-#define     V_SQ_CF_WORD1_SQ_CF_INST_NOP                             0x00000000
-#define     V_SQ_CF_WORD1_SQ_CF_INST_TEX                             0x00000001
-#define     V_SQ_CF_WORD1_SQ_CF_INST_VTX                             0x00000002
-#define     V_SQ_CF_WORD1_SQ_CF_INST_VTX_TC                          0x00000003
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START                      0x00000004
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END                        0x00000005
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10                 0x00000006
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL                0x00000007
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE                   0x00000008
-#define     V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK                      0x00000009
-#define     V_SQ_CF_WORD1_SQ_CF_INST_JUMP                            0x0000000A
-#define     V_SQ_CF_WORD1_SQ_CF_INST_PUSH                            0x0000000B
-#define     V_SQ_CF_WORD1_SQ_CF_INST_PUSH_ELSE                       0x0000000C
-#define     V_SQ_CF_WORD1_SQ_CF_INST_ELSE                            0x0000000D
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP                             0x0000000E
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_JUMP                        0x0000000F
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH                        0x00000010
-#define     V_SQ_CF_WORD1_SQ_CF_INST_POP_PUSH_ELSE                   0x00000011
-#define     V_SQ_CF_WORD1_SQ_CF_INST_CALL                            0x00000012
-#define     V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS                         0x00000013
-#define     V_SQ_CF_WORD1_SQ_CF_INST_RETURN                          0x00000014
-#define     V_SQ_CF_WORD1_SQ_CF_INST_EMIT_VERTEX                     0x00000015
-#define     V_SQ_CF_WORD1_SQ_CF_INST_EMIT_CUT_VERTEX                 0x00000016
-#define     V_SQ_CF_WORD1_SQ_CF_INST_CUT_VERTEX                      0x00000017
-#define     V_SQ_CF_WORD1_SQ_CF_INST_KILL                            0x00000018
 #define   S_SQ_CF_WORD1_WHOLE_QUAD_MODE(x)                           (((x) & 0x1) << 30)
 #define   G_SQ_CF_WORD1_WHOLE_QUAD_MODE(x)                           (((x) >> 30) & 0x1)
 #define   C_SQ_CF_WORD1_WHOLE_QUAD_MODE                              0xBFFFFFFF
@@ -121,13 +96,6 @@
 #define   S_SQ_CF_ALU_WORD1_CF_INST(x)                               (((x) & 0xF) << 26)
 #define   G_SQ_CF_ALU_WORD1_CF_INST(x)                               (((x) >> 26) & 0xF)
 #define   C_SQ_CF_ALU_WORD1_CF_INST                                  0xC3FFFFFF
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU                         0x00000008
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE             0x00000009
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER               0x0000000A
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER              0x0000000B
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_CONTINUE                0x0000000D
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_BREAK                   0x0000000E
-#define     V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_ELSE_AFTER              0x0000000F
 #define   S_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x)                       (((x) & 0x1) << 30)
 #define   G_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE(x)                       (((x) >> 30) & 0x1)
 #define   C_SQ_CF_ALU_WORD1_WHOLE_QUAD_MODE                          0xBFFFFFFF
@@ -173,15 +141,6 @@
 #define   S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x)                      (((x) & 0x7F) << 23)
 #define   G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(x)                      (((x) >> 23) & 0x7F)
 #define   C_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST                         0xC07FFFFF
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM0        0x00000020
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM1        0x00000021
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2        0x00000022
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3        0x00000023
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_SCRATCH        0x00000024
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_REDUCTION      0x00000025
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_RING           0x00000026
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT             0x00000027
-#define     V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE        0x00000028
 #define   S_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE(x)              (((x) & 0x1) << 30)
 #define   G_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE(x)              (((x) >> 30) & 0x1)
 #define   C_SQ_CF_ALLOC_EXPORT_WORD1_WHOLE_QUAD_MODE                 0xBFFFFFFF
-- 
1.7.5.4



More information about the mesa-dev mailing list