Mesa (master): r600g: add initial if/else/endif support

Dave Airlie airlied at kemper.freedesktop.org
Thu Aug 26 22:34:36 PDT 2010


Module: Mesa
Branch: master
Commit: a03d456f5a41926e39194de70b2d50776e64b8a2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a03d456f5a41926e39194de70b2d50776e64b8a2

Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Aug 25 15:57:41 2010 +1000

r600g: add initial if/else/endif support

this adds handling for some more CF instructions and conditions
also adds parameter for stack size emission

These seem to pass on VS with the stack size hack but not on FS,

TODO: fix FS + stack size calcs

---

 src/gallium/drivers/r600/r600_asm.c    |   58 +++++++++++++++++--
 src/gallium/drivers/r600/r600_asm.h    |   21 +++++++-
 src/gallium/drivers/r600/r600_shader.c |   98 ++++++++++++++++++++++++++++++--
 src/gallium/drivers/r600/r600_sq.h     |    5 ++
 4 files changed, 170 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index e6efae4..d83bb34 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -128,7 +128,7 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 	return 0;
 }
 
-int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
 	struct r600_bc_alu *nalu = r600_bc_alu();
 	struct r600_bc_alu *lalu;
@@ -140,7 +140,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 	nalu->nliteral = 0;
 
 	/* cf can contains only alu or only vtx or only tex */
-	if (bc->cf_last == NULL || bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+	if (bc->cf_last == NULL || bc->cf_last->inst != (type << 3) ||
 		bc->force_add_cf) {
 		/* at most 128 slots, one add alu can add 4 slots + 4 constant worst case */
 		r = r600_bc_add_cf(bc);
@@ -148,7 +148,7 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 			free(nalu);
 			return r;
 		}
-		bc->cf_last->inst = V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3;
+		bc->cf_last->inst = (type << 3);
 	}
 	if (alu->last && (bc->cf_last->ndw >> 1) >= 124) {
 		bc->force_add_cf = 1;
@@ -183,6 +183,11 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
 	return 0;
 }
 
+int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
+{
+	return r600_bc_add_alu_type(bc, alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU);
+}
+
 int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
 {
 	struct r600_bc_alu *alu;
@@ -193,7 +198,13 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
 	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
 		return 0;
 	}
-	if (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) ||
+	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
+	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
+		return 0;
+	}
+	if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
+	     (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
 		LIST_IS_EMPTY(&bc->cf_last->alu)) {
 		R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
 		return -EINVAL;
@@ -262,6 +273,18 @@ int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex)
 	return 0;
 }
 
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst)
+{
+	int r;
+	r = r600_bc_add_cf(bc);
+	if (r)
+		return r;
+
+	bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
+	bc->cf_last->inst = inst;
+	return 0;
+}
+
 static int r600_bc_vtx_build(struct r600_bc *bc, struct r600_bc_vtx *vtx, unsigned id)
 {
 	bc->bytecode[id++] = S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
@@ -342,7 +365,9 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
 					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
 					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
 					S_SQ_ALU_WORD1_OP2_ALU_INST(alu->inst) |
-					S_SQ_ALU_WORD1_BANK_SWIZZLE(0);
+					S_SQ_ALU_WORD1_BANK_SWIZZLE(0) |
+			                S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
+		 	                S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
 	}
 	if (alu->last) {
 		for (i = 0; i < alu->nliteral; i++) {
@@ -358,6 +383,7 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 
 	switch (cf->inst) {
 	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+	case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) |
 					S_SQ_CF_ALU_WORD1_BARRIER(1) |
@@ -385,6 +411,16 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->output.inst) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
 		break;
+	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+	case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+	case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
+		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
+					S_SQ_CF_WORD1_BARRIER(1) |
+			                S_SQ_CF_WORD1_COND(cf->cond) |
+			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+
+		break;
 	default:
 		R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
 		return -EINVAL;
@@ -401,13 +437,13 @@ int r600_bc_build(struct r600_bc *bc)
 	unsigned addr;
 	int r;
 
-
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
 	addr = bc->cf_last->id + 2;
 	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
 		switch (cf->inst) {
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			break;
 		case V_SQ_CF_WORD1_SQ_CF_INST_TEX:
 		case V_SQ_CF_WORD1_SQ_CF_INST_VTX:
@@ -419,6 +455,12 @@ int r600_bc_build(struct r600_bc *bc)
 		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
 			break;
+		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+			/* hack */
+			bc->nstack = 3;
+			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
 			return -EINVAL;
@@ -438,6 +480,7 @@ int r600_bc_build(struct r600_bc *bc)
 			return r;
 		switch (cf->inst) {
 		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3):
+		case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
 			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
 				switch(bc->chiprev) {
 				case 0:
@@ -477,6 +520,9 @@ int r600_bc_build(struct r600_bc *bc)
 			break;
 		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
+		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
 			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index e944bd0..dbd885c 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -47,6 +47,7 @@ struct r600_bc_alu {
 	unsigned			inst;
 	unsigned			last;
 	unsigned			is_op3;
+	unsigned                        predicate;
 	unsigned			nliteral;
 	unsigned			literal_added;
 	u32				value[4];
@@ -114,12 +115,25 @@ struct r600_bc_cf {
 	unsigned			addr;
 	unsigned			ndw;
 	unsigned			id;
+	unsigned                        cond;
+	unsigned                        pop_count;
+	unsigned                        cf_addr; /* control flow addr */
 	struct list_head		alu;
 	struct list_head		tex;
 	struct list_head		vtx;
 	struct r600_bc_output		output;
 };
 
+#define FC_NONE 0
+#define FC_IF 1
+#define FC_LOOP 2
+
+struct r600_cf_stack_entry {
+	int type;
+	struct r600_bc_cf *start;
+	struct r600_bc_cf *mid; /* used to store the else point */
+};
+	
 struct r600_bc {
 	enum radeon_family		family;
 	int chiprev; /* 0 - r600, 1 - r700, 2 - evergreen */
@@ -128,9 +142,13 @@ struct r600_bc {
 	unsigned			ndw;
 	unsigned			ncf;
 	unsigned			ngpr;
+	unsigned                        nstack;
 	unsigned			nresource;
 	unsigned			force_add_cf;
 	u32				*bytecode;
+
+	u32 fc_sp;
+	struct r600_cf_stack_entry fc_stack[32];
 };
 
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
@@ -140,5 +158,6 @@ int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx);
 int r600_bc_add_tex(struct r600_bc *bc, const struct r600_bc_tex *tex);
 int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output);
 int r600_bc_build(struct r600_bc *bc);
-
+int r600_bc_add_cfinst(struct r600_bc *bc, int inst);
+int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type);
 #endif
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 1470bb5..052b497 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -144,7 +144,8 @@ static int r600_pipe_shader_vs(struct pipe_context *ctx, struct r600_context_sta
 		state->states[R600_VS_SHADER__SPI_VS_OUT_ID_0 + i / 4] |= tmp;
 	}
 	state->states[R600_VS_SHADER__SPI_VS_OUT_CONFIG] = S_0286C4_VS_EXPORT_COUNT(rshader->noutput - 2);
-	state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
+	state->states[R600_VS_SHADER__SQ_PGM_RESOURCES_VS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+		S_028868_STACK_SIZE(rshader->bc.nstack);
 	rpshader->rstate = state;
 	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
 	rpshader->rstate->bo[1] = radeon_bo_incref(rscreen->rw, rpshader->bo);
@@ -200,7 +201,8 @@ static int r600_pipe_shader_ps(struct pipe_context *ctx, struct r600_context_sta
 	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_0] = S_0286CC_NUM_INTERP(rshader->ninput) |
 							S_0286CC_PERSP_GRADIENT_ENA(1);
 	state->states[R600_PS_SHADER__SPI_PS_IN_CONTROL_1] = 0x00000000;
-	state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr);
+	state->states[R600_PS_SHADER__SQ_PGM_RESOURCES_PS] = S_028868_NUM_GPRS(rshader->bc.ngpr) |
+		S_028868_STACK_SIZE(rshader->bc.nstack);
 	state->states[R600_PS_SHADER__SQ_PGM_EXPORTS_PS] = exports_ps;
 	rpshader->rstate = state;
 	rpshader->rstate->bo[0] = radeon_bo_incref(rscreen->rw, rpshader->bo);
@@ -276,10 +278,12 @@ static int tgsi_is_supported(struct r600_shader_ctx *ctx)
 		R600_ERR("predicate unsupported\n");
 		return -EINVAL;
 	}
+#if 0
 	if (i->Instruction.Label) {
 		R600_ERR("label unsupported\n");
 		return -EINVAL;
 	}
+#endif
 	for (j = 0; j < i->Instruction.NumSrcRegs; j++) {
 		if (i->Src[j].Register.Indirect ||
 			i->Src[j].Register.Dimension ||
@@ -1721,6 +1725,90 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 	return tgsi_helper_copy(ctx, inst);
 }
 
+static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	struct r600_bc_alu alu, *lalu;
+	struct r600_bc_cf *last;
+	int r;
+
+	memset(&alu, 0, sizeof(struct r600_bc_alu));
+	alu.inst = opcode;
+	alu.predicate = 1;
+
+	alu.dst.sel = ctx->temp_reg;
+	alu.dst.write = 1;
+	alu.dst.chan = 0;
+
+	r = tgsi_src(ctx, &inst->Src[0], &alu.src[0]);
+	if (r)
+		return r;
+	alu.src[0].chan = tgsi_chan(&inst->Src[0], 0);
+	alu.src[1].sel = V_SQ_ALU_SRC_0;
+	alu.src[1].chan = 0;
+	
+	alu.last = 1;
+
+	r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+static int pops(struct r600_shader_ctx *ctx, int pops)
+{
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_POP);	
+	ctx->bc->cf_last->pop_count = pops;
+	return 0;
+}
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+
+	emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+	ctx->bc->fc_sp++;
+	ctx->bc->fc_stack[ctx->bc->fc_sp].type = FC_IF;
+	ctx->bc->fc_stack[ctx->bc->fc_sp].mid = NULL;
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+	return 0;
+}
+
+static int tgsi_else(struct r600_shader_ctx *ctx)
+{
+	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
+	ctx->bc->cf_last->pop_count = 1;
+
+	/* fixup mid */
+	ctx->bc->fc_stack[ctx->bc->fc_sp].mid = ctx->bc->cf_last;
+	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
+	return 0;
+}
+
+static int tgsi_endif(struct r600_shader_ctx *ctx)
+{
+	pops(ctx, 1);
+	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) {
+		R600_ERR("if/endif unbalanced in shader\n");
+		return -1;
+	}
+
+	if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) {
+		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
+	} else {
+		ctx->bc->fc_stack[ctx->bc->fc_sp].mid->cf_addr = ctx->bc->cf_last->id + 2;
+	}
+	ctx->bc->fc_sp--;
+
+	return 0;
+}
+
 static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_ARL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_MOV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV, tgsi_op2},
@@ -1799,12 +1887,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
 	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_BRK,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
 	/* gap */
 	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{76,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ELSE,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_else},
+	{TGSI_OPCODE_ENDIF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endif},
 	/* gap */
 	{79,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{80,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index ad4de0b..b4ed435 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -603,4 +603,9 @@
 #define   G_SQ_TEX_WORD2_SRC_SEL_W(x)                                (((x) >> 29) & 0x7)
 #define   C_SQ_TEX_WORD2_SRC_SEL_W                                   0x1FFFFFFF
 
+#define V_SQ_CF_COND_ACTIVE                             0x00
+#define V_SQ_CF_COND_FALSE                              0x01
+#define V_SQ_CF_COND_BOOL                               0x02
+#define V_SQ_CF_COND_NOT_BOOL                           0x03
+
 #endif



More information about the mesa-commit mailing list