Mesa (master): r600g: lower stream outputs with dst_offset < start_component

Marek Olšák mareko at kemper.freedesktop.org
Thu Dec 20 16:13:37 UTC 2012


Module: Mesa
Branch: master
Commit: 72362ebefb48cb79355b1e295ca21341d45ea85c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=72362ebefb48cb79355b1e295ca21341d45ea85c

Author: Marek Olšák <maraeo at gmail.com>
Date:   Wed Dec 19 15:09:54 2012 +0100

r600g: lower stream outputs with dst_offset < start_component

This fixes streamout breakage caused by the varying packing.

Reviewed-by: Dave Airlie <airlied at redhat.com>

---

 src/gallium/drivers/r600/r600_shader.c |   54 +++++++++++++++++++++++++++----
 1 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index 0ffa1fa..55b5db3 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -1530,23 +1530,63 @@ static int r600_shader_from_tgsi(struct r600_screen *rscreen,
 
 	/* Add stream outputs. */
 	if (ctx.type == TGSI_PROCESSOR_VERTEX && so.num_outputs) {
-		for (i = 0; i < so.num_outputs; i++) {
-			struct r600_bytecode_output output;
+		unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS];
 
+		/* Sanity checking. */
+		if (so.num_outputs > PIPE_MAX_SHADER_OUTPUTS) {
+			R600_ERR("Too many stream outputs: %d\n", so.num_outputs);
+			r = -EINVAL;
+			goto out_err;
+		}
+		for (i = 0; i < so.num_outputs; i++) {
 			if (so.output[i].output_buffer >= 4) {
-				R600_ERR("exceeded the max number of stream output buffers, got: %d\n",
+				R600_ERR("Exceeded the max number of stream output buffers, got: %d\n",
 					 so.output[i].output_buffer);
 				r = -EINVAL;
 				goto out_err;
 			}
+		}
+
+		/* Initialize locations where the outputs are stored. */
+		for (i = 0; i < so.num_outputs; i++) {
+			so_gpr[i] = shader->output[so.output[i].register_index].gpr;
+
+			/* Lower outputs with dst_offset < start_component.
+			 *
+			 * We can only output 4D vectors with a write mask, e.g. we can
+			 * only output the W component at offset 3, etc. If we want
+			 * to store Y, Z, or W at buffer offset 0, we need to use MOV
+			 * to move it to X and output X. */
 			if (so.output[i].dst_offset < so.output[i].start_component) {
-			   R600_ERR("stream_output - dst_offset cannot be less than start_component\n");
-			   r = -EINVAL;
-			   goto out_err;
+				unsigned tmp = r600_get_temp(&ctx);
+
+				for (j = 0; j < so.output[i].num_components; j++) {
+					struct r600_bytecode_alu alu;
+					memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+					alu.inst = BC_INST(ctx.bc, V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV);
+					alu.src[0].sel = so_gpr[i];
+					alu.src[0].chan = so.output[i].start_component + j;
+
+					alu.dst.sel = tmp;
+					alu.dst.chan = j;
+					alu.dst.write = 1;
+					if (j == so.output[i].num_components - 1)
+						alu.last = 1;
+					r = r600_bytecode_add_alu(ctx.bc, &alu);
+					if (r)
+						return r;
+				}
+				so.output[i].start_component = 0;
+				so_gpr[i] = tmp;
 			}
+		}
+
+		/* Write outputs to buffers. */
+		for (i = 0; i < so.num_outputs; i++) {
+			struct r600_bytecode_output output;
 
 			memset(&output, 0, sizeof(struct r600_bytecode_output));
-			output.gpr = shader->output[so.output[i].register_index].gpr;
+			output.gpr = so_gpr[i];
 			output.elem_size = 0;
 			output.array_base = so.output[i].dst_offset - so.output[i].start_component;
 			output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;




More information about the mesa-commit mailing list