[Mesa-dev] [PATCH 1/1] r600g: work around shaders allocating too many superflous temporaries

Gert Wollny gw.fossdev at gmail.com
Tue May 30 12:14:03 UTC 2017


Related bugs:
https://bugs.freedesktop.org/show_bug.cgi?id=99349
https://bugs.freedesktop.org/show_bug.cgi?id=50338

1. Allocate ctx.temp_reg and a limited number of registers
(R600_TEMP_REG_RESERVED=10) that are given out via r600_get_temp() before
the temporaries of the TGSI are allocated. That makes it possible for
tgsi_split_constants() allocate registers inside the proper GPR range,
so that r600_asm.c:check_and_set_bank_swizzle doesn't fail.

2. Move the test for the register use limit (124) to after the optimization
in r600_pipe_shader_create(). Add a test for a hard limit of 191 in
tr600_shader_from_tgsi() though to avoid interference with reserved values.
---
 src/gallium/drivers/r600/r600_shader.c | 52 +++++++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index bdaf28ced2..d550f4cd7f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -83,6 +83,13 @@ The compiler must issue the source argument to slots z, y, and x
       face_gpr.w = SampleID
 */
 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+/* Number of GPRs reserved before the temporaries in order to work around
+   problems with shaders that request too many temporaries that can be
+   optimized away in the sb pass.
+*/
+#define R600_TEMP_REG_RESERVED 10
+
 static int r600_shader_from_tgsi(struct r600_context *rctx,
 				 struct r600_pipe_shader *pipeshader,
 				 union r600_shader_key key);
@@ -216,6 +223,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
 		}
 	}
 
+	if (shader->shader.bc.ngpr > 124) {
+		r = -ENOMEM;
+		R600_ERR("Shader GPR limit exceeded - shader requires %d registers.\n",
+		         shader->shader.bc.ngpr);
+		goto error;
+	}
+
 	if (shader->gs_copy_shader) {
 		if (dump) {
 			// dump copy shader
@@ -322,6 +336,7 @@ struct r600_shader_ctx {
 	unsigned				type;
 	unsigned				file_offset[TGSI_FILE_COUNT];
 	unsigned				temp_reg;
+	unsigned				temp_reg_highmem;
 	const struct r600_shader_tgsi_instruction	*inst_info;
 	struct r600_bytecode			*bc;
 	struct r600_shader			*shader;
@@ -814,7 +829,11 @@ static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
 
 static int r600_get_temp(struct r600_shader_ctx *ctx)
 {
-	return ctx->temp_reg + ctx->max_driver_temp_used++;
+	if (ctx->max_driver_temp_used < R600_TEMP_REG_RESERVED)
+		return ctx->temp_reg + ctx->max_driver_temp_used++;
+	else
+		return ctx->temp_reg_highmem + ctx->max_driver_temp_used++ -
+		       R600_TEMP_REG_RESERVED;
 }
 
 static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
@@ -2213,6 +2232,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
 		r600_bytecode_add_vtx(ctx.bc, &vtx);
 	}
 	ctx.temp_reg = i + 1;
+	ctx.temp_reg_highmem = ctx.temp_reg + R600_TEMP_REG_RESERVED;
+
 	for (ring = 3; ring >= 0; --ring) {
 		bool enabled = false;
 		for (i = 0; i < so->num_outputs; i++) {
@@ -3065,8 +3086,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	ctx.file_offset[TGSI_FILE_OUTPUT] =
 			ctx.file_offset[TGSI_FILE_INPUT] +
 			ctx.info.file_max[TGSI_FILE_INPUT] + 1;
-	ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
-						ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+        ctx.temp_reg = ctx.file_offset[TGSI_FILE_OUTPUT] +
+                       ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+        ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.temp_reg + R600_TEMP_REG_RESERVED;
 
 	/* Outside the GPR range. This will be translated to one of the
 	 * kcache banks later. */
@@ -3081,19 +3105,19 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	if (ctx.type == PIPE_SHADER_TESS_CTRL) {
 		ctx.tess_input_info = ctx.bc->ar_reg + 3;
 		ctx.tess_output_info = ctx.bc->ar_reg + 4;
-		ctx.temp_reg = ctx.bc->ar_reg + 5;
+		ctx.temp_reg_highmem = ctx.bc->ar_reg + 5;
 	} else if (ctx.type == PIPE_SHADER_TESS_EVAL) {
 		ctx.tess_input_info = 0;
 		ctx.tess_output_info = ctx.bc->ar_reg + 3;
-		ctx.temp_reg = ctx.bc->ar_reg + 4;
+		ctx.temp_reg_highmem = ctx.bc->ar_reg + 4;
 	} else if (ctx.type == PIPE_SHADER_GEOMETRY) {
 		ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
 		ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
 		ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
 		ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
-		ctx.temp_reg = ctx.bc->ar_reg + 7;
+		ctx.temp_reg_highmem = ctx.bc->ar_reg + 7;
 	} else {
-		ctx.temp_reg = ctx.bc->ar_reg + 3;
+		ctx.temp_reg_highmem = ctx.bc->ar_reg + 3;
 	}
 
 	shader->max_arrays = 0;
@@ -3656,9 +3680,17 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
 	}
 
 	/* check GPR limit - we have 124 = 128 - 4
-	 * (4 are reserved as alu clause temporary registers) */
-	if (ctx.bc->ngpr > 124) {
-		R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
+	 * (4 are reserved as alu clause temporary registers)
+	 * Use this as a soft limit since the sb optimiation pass
+	 * might reduce this number. */
+	if (ctx.bc->ngpr > 124)
+		fprintf(stderr, "Warning: GPR limit exceeded prior to optimization"
+			    " - shader requires %d registers\n", ctx.bc->ngpr);
+
+    /* Set a hard limit for register usage */
+	if (ctx.bc->ngpr > 191) {
+		R600_ERR("GPR limit exceeded - shader requires %d registers\n",
+		         ctx.bc->ngpr);
 		r = -ENOMEM;
 		goto out_err;
 	}
-- 
2.13.0



More information about the mesa-dev mailing list