Mesa (master): r600g : add basic loop support.

Dave Airlie airlied at kemper.freedesktop.org
Mon Aug 30 03:40:41 UTC 2010


Module: Mesa
Branch: master
Commit: 09547e1bcee7df3444dd8682770d1b31da1a5822
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=09547e1bcee7df3444dd8682770d1b31da1a5822

Author: Dave Airlie <airlied at redhat.com>
Date:   Fri Aug 27 16:08:55 2010 +1000

r600g : add basic loop support.

Adds BGNLOOP, BRK, CONT, ENDLOOP support, ported from r600c.

17 piglits more on r300g.tests.

---

 src/gallium/drivers/r600/r600_asm.c    |   21 +++-
 src/gallium/drivers/r600/r600_asm.h    |   17 +++-
 src/gallium/drivers/r600/r600_shader.c |  232 +++++++++++++++++++++++++++++---
 3 files changed, 247 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index d83bb34..03fe950 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -200,6 +200,10 @@ int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
 	}
 	if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
 	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
+	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
+	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
+	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
+	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
 	    bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
 		return 0;
 	}
@@ -414,6 +418,10 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
 	case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
 	case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
 	case V_SQ_CF_WORD1_SQ_CF_INST_POP:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+	case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
 		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) |
 					S_SQ_CF_WORD1_BARRIER(1) |
@@ -437,6 +445,9 @@ int r600_bc_build(struct r600_bc *bc)
 	unsigned addr;
 	int r;
 
+	if (bc->callstack[0].max > 0)
+	    bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
+
 	/* first path compute addr of each CF block */
 	/* addr start after all the CF instructions */
 	addr = bc->cf_last->id + 2;
@@ -458,8 +469,10 @@ int r600_bc_build(struct r600_bc *bc)
 		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
 		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
 		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-			/* hack */
-			bc->nstack = 3;
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
 			break;
 		default:
 			R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst);
@@ -520,6 +533,10 @@ int r600_bc_build(struct r600_bc *bc)
 			break;
 		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT:
 		case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
+		case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
 		case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
 		case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
 		case V_SQ_CF_WORD1_SQ_CF_INST_POP:
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index dbd885c..bb4f4b7 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -127,11 +127,23 @@ struct r600_bc_cf {
 #define FC_NONE 0
 #define FC_IF 1
 #define FC_LOOP 2
+#define FC_REP 3
+#define FC_PUSH_VPM 4
+#define FC_PUSH_WQM 5
 
 struct r600_cf_stack_entry {
 	int type;
 	struct r600_bc_cf *start;
-	struct r600_bc_cf *mid; /* used to store the else point */
+	struct r600_bc_cf **mid; /* used to store the else point */
+	int num_mid;
+};
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+struct r600_cf_callstack {
+	unsigned fc_sp_before_entry;
+	int sub_desc_index;
+	int current;
+	int max;
 };
 	
 struct r600_bc {
@@ -149,6 +161,9 @@ struct r600_bc {
 
 	u32 fc_sp;
 	struct r600_cf_stack_entry fc_stack[32];
+
+	unsigned call_sp;
+	struct r600_cf_callstack callstack[SQ_MAX_CALL_DEPTH];
 };
 
 int r600_bc_init(struct r600_bc *bc, enum radeon_family family);
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index dabc7be..82f4d73 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1650,8 +1650,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	struct r600_bc_alu_src r600_src[3];
 	struct r600_bc_alu alu;
-	uint32_t use_temp = 0;
-	int i, r;
+	int r;
 
 	/* result.x = 2^floor(src); */
 	if (inst->Dst[0].Register.WriteMask & 1) {
@@ -1753,8 +1752,7 @@ static int tgsi_exp(struct r600_shader_ctx *ctx)
 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 {
 	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
-	struct r600_bc_alu alu, *lalu;
-	struct r600_bc_cf *last;
+	struct r600_bc_alu alu;
 	int r;
 
 	memset(&alu, 0, sizeof(struct r600_bc_alu));
@@ -1777,7 +1775,6 @@ static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode)
 	r = r600_bc_add_alu_type(ctx->bc, &alu, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE);
 	if (r)
 		return r;
-
 	return 0;
 }
 
@@ -1788,29 +1785,158 @@ static int pops(struct r600_shader_ctx *ctx, int pops)
 	return 0;
 }
 
-static int tgsi_if(struct r600_shader_ctx *ctx)
+static inline void callstack_decrease_current(struct r600_shader_ctx *ctx, unsigned reason)
 {
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
+	switch(reason) {
+	case FC_PUSH_VPM:
+		ctx->bc->callstack[ctx->bc->call_sp].current--;
+		break;
+	case FC_PUSH_WQM:
+	case FC_LOOP:
+		ctx->bc->callstack[ctx->bc->call_sp].current -= 4;
+		break;
+	case FC_REP:
+		/* TOODO : for 16 vp asic should -= 2; */
+		ctx->bc->callstack[ctx->bc->call_sp].current --;
+		break;
+	}
+}
 
-	emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+static inline void callstack_check_depth(struct r600_shader_ctx *ctx, unsigned reason, unsigned check_max_only)
+{
+	if (check_max_only) {
+		int diff;
+		switch (reason) {
+		case FC_PUSH_VPM:
+			diff = 1;
+			break;
+		case FC_PUSH_WQM:
+			diff = 4;
+			break;
+		}
+		if ((ctx->bc->callstack[ctx->bc->call_sp].current + diff) >
+		    ctx->bc->callstack[ctx->bc->call_sp].max) {
+			ctx->bc->callstack[ctx->bc->call_sp].max =
+				ctx->bc->callstack[ctx->bc->call_sp].current + diff;
+		}
+		return;
+	}					
+	switch (reason) {
+	case FC_PUSH_VPM:
+		ctx->bc->callstack[ctx->bc->call_sp].current++;
+		break;
+	case FC_PUSH_WQM:
+	case FC_LOOP:
+		ctx->bc->callstack[ctx->bc->call_sp].current += 4;
+		break;
+	case FC_REP:
+		ctx->bc->callstack[ctx->bc->call_sp].current++;
+		break;
+	}
+
+	if ((ctx->bc->callstack[ctx->bc->call_sp].current) >
+	    ctx->bc->callstack[ctx->bc->call_sp].max) {
+		ctx->bc->callstack[ctx->bc->call_sp].max =
+			ctx->bc->callstack[ctx->bc->call_sp].current;
+	}
+}
+
+static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp)
+{
+	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp];
+
+	sp->mid = (struct r600_bc_cf **)realloc((void *)sp->mid,
+						sizeof(struct r600_bc_cf *) * (sp->num_mid + 1));
+	sp->mid[sp->num_mid] = ctx->bc->cf_last;
+	sp->num_mid++;
+}
 
+static void fc_pushlevel(struct r600_shader_ctx *ctx, int type)
+{
 	ctx->bc->fc_sp++;
-	ctx->bc->fc_stack[ctx->bc->fc_sp].type = FC_IF;
-	ctx->bc->fc_stack[ctx->bc->fc_sp].mid = NULL;
+	ctx->bc->fc_stack[ctx->bc->fc_sp].type = type;
+	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+}
+
+static void fc_poplevel(struct r600_shader_ctx *ctx)
+{
+	struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp];
+	if (sp->mid) {
+		free(sp->mid);
+		sp->mid = NULL;
+	}
+	sp->num_mid = 0;
+	sp->start = NULL;
+	sp->type = 0;
+	ctx->bc->fc_sp--;
+}
+
+#if 0
+static int emit_return(struct r600_shader_ctx *ctx)
+{
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_RETURN);
+	return 0;
+}
+
+static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset)
+{
+
 	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+	ctx->bc->cf_last->pop_count = pops;
+	/* TODO work out offset */
+	return 0;
+}
 
-	ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last;
+static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value)
+{
+	return 0;
+}
+
+static void emit_testflag(struct r600_shader_ctx *ctx)
+{
+	
+}
+
+static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx)
+{
+	emit_testflag(ctx);
+	emit_jump_to_offset(ctx, 1, 4);
+	emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0);
+	pops(ctx, ifidx + 1);
+	emit_return(ctx);
+}
+
+static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp)
+{
+	emit_testflag(ctx);
+
+	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+	ctx->bc->cf_last->pop_count = 1;
+
+	fc_set_mid(ctx, fc_sp);
+
+	pops(ctx, 1);
+}
+#endif
+
+static int tgsi_if(struct r600_shader_ctx *ctx)
+{
+	emit_logic_pred(ctx, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE);
+
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_JUMP);
+
+	fc_pushlevel(ctx, FC_IF);
+
+	callstack_check_depth(ctx, FC_PUSH_VPM, 0);
 	return 0;
 }
 
 static int tgsi_else(struct r600_shader_ctx *ctx)
 {
-	struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
 	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_ELSE);
 	ctx->bc->cf_last->pop_count = 1;
 
-	/* fixup mid */
-	ctx->bc->fc_stack[ctx->bc->fc_sp].mid = ctx->bc->cf_last;
+	fc_set_mid(ctx, ctx->bc->fc_sp);
 	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id;
 	return 0;
 }
@@ -1827,10 +1953,76 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
 		ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
 		ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1;
 	} else {
-		ctx->bc->fc_stack[ctx->bc->fc_sp].mid->cf_addr = ctx->bc->cf_last->id + 2;
+		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2;
 	}
-	ctx->bc->fc_sp--;
+	fc_poplevel(ctx);
+
+	callstack_decrease_current(ctx, FC_PUSH_VPM);
+	return 0;
+}
+
+static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
+{
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL);
+
+	fc_pushlevel(ctx, FC_LOOP);
 
+	/* check stack depth */
+	callstack_check_depth(ctx, FC_LOOP, 0);
+	return 0;
+}
+
+static int tgsi_endloop(struct r600_shader_ctx *ctx)
+{
+	int i;
+
+	r600_bc_add_cfinst(ctx->bc, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END);
+
+	if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) {
+		R600_ERR("loop/endloop in shader code are not paired.\n");
+		return -EINVAL;
+	}
+
+	/* fixup loop pointers - from r600isa
+	   LOOP END points to CF after LOOP START,
+	   LOOP START point to CF after LOOP END
+	   BRK/CONT point to LOOP END CF
+	*/
+	ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2;
+
+	ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2;
+
+	for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) {
+		ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id;
+	}
+	/* TODO add LOOPRET support */
+	fc_poplevel(ctx);
+	callstack_decrease_current(ctx, FC_LOOP);
+	return 0;
+}
+
+static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx)
+{
+	unsigned int fscp;
+
+	for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--)
+	{
+		if (FC_LOOP == ctx->bc->fc_stack[fscp].type)
+			break;
+	}
+
+	if (fscp == 0) {
+		R600_ERR("Break not inside loop/endloop pair\n");
+		return -EINVAL;
+	}
+
+	r600_bc_add_cfinst(ctx->bc, ctx->inst_info->r600_opcode);
+	ctx->bc->cf_last->pop_count = 1;
+
+	fc_set_mid(ctx, fscp);
+
+	pops(ctx, 1);
+	callstack_check_depth(ctx, FC_PUSH_VPM, 1);
 	return 0;
 }
 
@@ -1911,7 +2103,7 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_DIV,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_DP2,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4, tgsi_dp},
 	{TGSI_OPCODE_TXL,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_BRK,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_BRK,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK, tgsi_loop_brk_cont},
 	{TGSI_OPCODE_IF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_if},
 	/* gap */
 	{75,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
@@ -1937,12 +2129,12 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = {
 	{TGSI_OPCODE_SAD,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_TXF,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_TXQ,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_CONT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_CONT,	0, V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE, tgsi_loop_brk_cont},
 	{TGSI_OPCODE_EMIT,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	{TGSI_OPCODE_ENDPRIM,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_BGNLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_bgnloop},
 	{TGSI_OPCODE_BGNSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
-	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
+	{TGSI_OPCODE_ENDLOOP,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_endloop},
 	{TGSI_OPCODE_ENDSUB,	0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},
 	/* gap */
 	{103,			0, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP, tgsi_unsupported},




More information about the mesa-commit mailing list