[Mesa-dev] [PATCH] r600g: move barrier and end_of_program bits from output to cf struct (v2)

Dave Airlie airlied at gmail.com
Thu Jan 30 18:11:03 PST 2014


From: Vadim Girlin <vadimgirlin at gmail.com>

v2: fix regression on r600 NOP instructions.

Signed-off-by: Vadim Girlin <vadimgirlin at gmail.com>
Signed-off-by: Dave Airlie <airlied at redhat.com>

Fix regression since eop moving
---
 src/gallium/drivers/r600/eg_asm.c      | 10 ++++++----
 src/gallium/drivers/r600/r600_asm.c    | 24 +++++++++++++-----------
 src/gallium/drivers/r600/r600_asm.h    |  4 ++--
 src/gallium/drivers/r600/r600_shader.c | 26 +++++++++++++-------------
 4 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c
index fffc436..42e78c0 100644
--- a/src/gallium/drivers/r600/eg_asm.c
+++ b/src/gallium/drivers/r600/eg_asm.c
@@ -86,11 +86,11 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 					S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 					S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 					S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-					S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+					S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
 					S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode);
 
 			if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
-				bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+				bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
 			id++;
 		} else if (cfop->flags & CF_STRM) {
 			/* MEM_STREAM instructions */
@@ -99,12 +99,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 					S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
 					S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
 			bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
-					S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+					S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
 					S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
 					S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
 					S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
 			if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
-				bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+				bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
 			id++;
 		} else {
 			/* branch, loop, call, return instructions */
@@ -118,6 +118,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
 	return 0;
 }
 
+#if 0
 void eg_bytecode_export_read(struct r600_bytecode *bc,
 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
 {
@@ -138,3 +139,4 @@ void eg_bytecode_export_read(struct r600_bytecode *bc,
 	output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
 	output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
+#endif
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 86f79e2..7f7731c 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -193,7 +193,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
 		if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
 			(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
 
-			bc->cf_last->output.end_of_program |= output->end_of_program;
 			bc->cf_last->op = bc->cf_last->output.op = output->op;
 			bc->cf_last->output.gpr = output->gpr;
 			bc->cf_last->output.array_base = output->array_base;
@@ -203,7 +202,6 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
 		} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
 			output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
 
-			bc->cf_last->output.end_of_program |= output->end_of_program;
 			bc->cf_last->op = bc->cf_last->output.op = output->op;
 			bc->cf_last->output.burst_count += output->burst_count;
 			return 0;
@@ -215,6 +213,7 @@ int r600_bytecode_add_output(struct r600_bytecode *bc,
 		return r;
 	bc->cf_last->op = output->op;
 	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
+	bc->cf_last->barrier = 1;
 	return 0;
 }
 
@@ -1532,18 +1531,18 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
 	} else if (cfop->flags & CF_STRM) {
 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
 			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
 		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
-			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) |
+			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
 			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
 	} else {
@@ -1551,7 +1550,8 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
 		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
 					S_SQ_CF_WORD1_BARRIER(1) |
 			                S_SQ_CF_WORD1_COND(cf->cond) |
-			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+					S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
 	}
 	return 0;
 }
@@ -1932,9 +1932,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 				print_indent(o, 67);
 
 				fprintf(stderr, " ES:%X ", cf->output.elem_size);
-				if (!cf->output.barrier)
+				if (!cf->barrier)
 					fprintf(stderr, "NO_BARRIER ");
-				if (cf->output.end_of_program)
+				if (cf->end_of_program)
 					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			} else if (r600_isa_cf(cf->op)->flags & CF_STRM) {
@@ -1968,9 +1968,9 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 				fprintf(stderr, " ES:%i ", cf->output.elem_size);
 				if (cf->output.array_size != 0xFFF)
 					fprintf(stderr, "AS:%i ", cf->output.array_size);
-				if (!cf->output.barrier)
+				if (!cf->barrier)
 					fprintf(stderr, "NO_BARRIER ");
-				if (cf->output.end_of_program)
+				if (cf->end_of_program)
 					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			} else {
@@ -2486,6 +2486,7 @@ void r600_bytecode_alu_read(struct r600_bytecode *bc,
 	}
 }
 
+#if 0
 void r600_bytecode_export_read(struct r600_bytecode *bc,
 		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
 {
@@ -2506,3 +2507,4 @@ void r600_bytecode_export_read(struct r600_bytecode *bc,
 	output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
 	output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
 }
+#endif
diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h
index 82c6c8d..3bfbcb2 100644
--- a/src/gallium/drivers/r600/r600_asm.h
+++ b/src/gallium/drivers/r600/r600_asm.h
@@ -115,7 +115,6 @@ struct r600_bytecode_output {
 	unsigned			array_size;
 	unsigned			comp_mask;
 	unsigned			type;
-	unsigned			end_of_program;
 
 	unsigned			op;
 
@@ -126,7 +125,6 @@ struct r600_bytecode_output {
 	unsigned			swizzle_z;
 	unsigned			swizzle_w;
 	unsigned			burst_count;
-	unsigned			barrier;
 };
 
 struct r600_bytecode_kcache {
@@ -148,6 +146,8 @@ struct r600_bytecode_cf {
 	struct r600_bytecode_kcache		kcache[4];
 	unsigned			r6xx_uses_waterfall;
 	unsigned			eg_alu_extended;
+	unsigned			barrier;
+	unsigned			end_of_program;
 	struct list_head		alu;
 	struct list_head		tex;
 	struct list_head		vtx;
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 5fd445e..32d2aa7 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -939,7 +939,6 @@ static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output
 		output.array_base = so->output[i].dst_offset - so->output[i].start_component;
 		output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
 		output.burst_count = 1;
-		output.barrier = 1;
 		/* array_size is an upper limit for the burst_count
 		 * with MEM_STREAM instructions */
 		output.array_size = 0xFFF;
@@ -1384,7 +1383,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 		output[j].swizzle_z = 2;
 		output[j].swizzle_w = 3;
 		output[j].burst_count = 1;
-		output[j].barrier = 1;
 		output[j].type = -1;
 		output[j].op = CF_OP_EXPORT;
 		switch (ctx.type) {
@@ -1445,7 +1443,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 						output[j].swizzle_z = 2;
 						output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
 						output[j].burst_count = 1;
-						output[j].barrier = 1;
 						output[j].array_base = k;
 						output[j].op = CF_OP_EXPORT;
 						output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
@@ -1492,7 +1489,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 			output[j].swizzle_z = 7;
 			output[j].swizzle_w = 7;
 			output[j].burst_count = 1;
-			output[j].barrier = 1;
 			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
 			output[j].array_base = next_pos_base;
 			output[j].op = CF_OP_EXPORT;
@@ -1509,7 +1505,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 			output[j].swizzle_z = 7;
 			output[j].swizzle_w = 7;
 			output[j].burst_count = 1;
-			output[j].barrier = 1;
 			output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
 			output[j].array_base = 0;
 			output[j].op = CF_OP_EXPORT;
@@ -1526,7 +1521,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 		output[j].swizzle_z = 7;
 		output[j].swizzle_w = 7;
 		output[j].burst_count = 1;
-		output[j].barrier = 1;
 		output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
 		output[j].array_base = 0;
 		output[j].op = CF_OP_EXPORT;
@@ -1537,11 +1531,6 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 
 	/* set export done on last export of each type */
 	for (i = noutput - 1, output_done = 0; i >= 0; i--) {
-		if (ctx.bc->chip_class < CAYMAN) {
-			if (i == (noutput - 1)) {
-				output[i].end_of_program = 1;
-			}
-		}
 		if (!(output_done & (1 << output[i].type))) {
 			output_done |= (1 << output[i].type);
 			output[i].op = CF_OP_EXPORT_DONE;
@@ -1555,9 +1544,20 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 				goto out_err;
 		}
 	}
+
 	/* add program end */
-	if (!use_llvm && ctx.bc->chip_class == CAYMAN)
-		cm_bytecode_add_cf_end(ctx.bc);
+	if (!use_llvm) {
+		if (ctx.bc->chip_class == CAYMAN)
+			cm_bytecode_add_cf_end(ctx.bc);
+		else {
+			const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op);
+
+			if (last->flags & CF_CLAUSE)
+				r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+
+			ctx.bc->cf_last->end_of_program = 1;
+		}
+	}
 
 	/* check GPR limit - we have 124 = 128 - 4
 	 * (4 are reserved as alu clause temporary registers) */
-- 
1.8.3.1



More information about the mesa-dev mailing list