[Mesa-dev] [PATCH 1/1] r600g: work around shaders allocating too many superflous temporaries
Gert Wollny
gw.fossdev at gmail.com
Tue May 30 12:14:03 UTC 2017
Related bugs:
https://bugs.freedesktop.org/show_bug.cgi?id=99349
https://bugs.freedesktop.org/show_bug.cgi?id=50338
1. Allocate ctx.temp_reg and a limited number of registers
(R600_TEMP_REG_RESERVED=10) that are given out via r600_get_temp() before
the temporaries of the TGSI are allocated. That makes it possible for
tgsi_split_constants() allocate registers inside the proper GPR range,
so that r600_asm.c:check_and_set_bank_swizzle doesn't fail.
2. Move the test for the register use limit (124) to after the optimization
in r600_pipe_shader_create(). Add a test for a hard limit of 191 in
tr600_shader_from_tgsi() though to avoid interference with reserved values.
---
src/gallium/drivers/r600/r600_shader.c | 52 +++++++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index bdaf28ced2..d550f4cd7f 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -83,6 +83,13 @@ The compiler must issue the source argument to slots z, y, and x
face_gpr.w = SampleID
*/
#define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16)
+
+/* Number of GPRs reserved before the temporaries in order to work around
+ problems with shaders that request too many temporaries that can be
+ optimized away in the sb pass.
+*/
+#define R600_TEMP_REG_RESERVED 10
+
static int r600_shader_from_tgsi(struct r600_context *rctx,
struct r600_pipe_shader *pipeshader,
union r600_shader_key key);
@@ -216,6 +223,13 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
}
}
+ if (shader->shader.bc.ngpr > 124) {
+ r = -ENOMEM;
+ R600_ERR("Shader GPR limit exceeded - shader requires %d registers.\n",
+ shader->shader.bc.ngpr);
+ goto error;
+ }
+
if (shader->gs_copy_shader) {
if (dump) {
// dump copy shader
@@ -322,6 +336,7 @@ struct r600_shader_ctx {
unsigned type;
unsigned file_offset[TGSI_FILE_COUNT];
unsigned temp_reg;
+ unsigned temp_reg_highmem;
const struct r600_shader_tgsi_instruction *inst_info;
struct r600_bytecode *bc;
struct r600_shader *shader;
@@ -814,7 +829,11 @@ static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
static int r600_get_temp(struct r600_shader_ctx *ctx)
{
- return ctx->temp_reg + ctx->max_driver_temp_used++;
+ if (ctx->max_driver_temp_used < R600_TEMP_REG_RESERVED)
+ return ctx->temp_reg + ctx->max_driver_temp_used++;
+ else
+ return ctx->temp_reg_highmem + ctx->max_driver_temp_used++ -
+ R600_TEMP_REG_RESERVED;
}
static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
@@ -2213,6 +2232,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
r600_bytecode_add_vtx(ctx.bc, &vtx);
}
ctx.temp_reg = i + 1;
+ ctx.temp_reg_highmem = ctx.temp_reg + R600_TEMP_REG_RESERVED;
+
for (ring = 3; ring >= 0; --ring) {
bool enabled = false;
for (i = 0; i < so->num_outputs; i++) {
@@ -3065,8 +3086,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[TGSI_FILE_OUTPUT] =
ctx.file_offset[TGSI_FILE_INPUT] +
ctx.info.file_max[TGSI_FILE_INPUT] + 1;
- ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] +
- ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+ ctx.temp_reg = ctx.file_offset[TGSI_FILE_OUTPUT] +
+ ctx.info.file_max[TGSI_FILE_OUTPUT] + 1;
+
+ ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.temp_reg + R600_TEMP_REG_RESERVED;
/* Outside the GPR range. This will be translated to one of the
* kcache banks later. */
@@ -3081,19 +3105,19 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
if (ctx.type == PIPE_SHADER_TESS_CTRL) {
ctx.tess_input_info = ctx.bc->ar_reg + 3;
ctx.tess_output_info = ctx.bc->ar_reg + 4;
- ctx.temp_reg = ctx.bc->ar_reg + 5;
+ ctx.temp_reg_highmem = ctx.bc->ar_reg + 5;
} else if (ctx.type == PIPE_SHADER_TESS_EVAL) {
ctx.tess_input_info = 0;
ctx.tess_output_info = ctx.bc->ar_reg + 3;
- ctx.temp_reg = ctx.bc->ar_reg + 4;
+ ctx.temp_reg_highmem = ctx.bc->ar_reg + 4;
} else if (ctx.type == PIPE_SHADER_GEOMETRY) {
ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3;
ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4;
ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5;
ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6;
- ctx.temp_reg = ctx.bc->ar_reg + 7;
+ ctx.temp_reg_highmem = ctx.bc->ar_reg + 7;
} else {
- ctx.temp_reg = ctx.bc->ar_reg + 3;
+ ctx.temp_reg_highmem = ctx.bc->ar_reg + 3;
}
shader->max_arrays = 0;
@@ -3656,9 +3680,17 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
}
/* check GPR limit - we have 124 = 128 - 4
- * (4 are reserved as alu clause temporary registers) */
- if (ctx.bc->ngpr > 124) {
- R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr);
+ * (4 are reserved as alu clause temporary registers)
+ * Use this as a soft limit since the sb optimiation pass
+ * might reduce this number. */
+ if (ctx.bc->ngpr > 124)
+ fprintf(stderr, "Warning: GPR limit exceeded prior to optimization"
+ " - shader requires %d registers\n", ctx.bc->ngpr);
+
+ /* Set a hard limit for register usage */
+ if (ctx.bc->ngpr > 191) {
+ R600_ERR("GPR limit exceeded - shader requires %d registers\n",
+ ctx.bc->ngpr);
r = -ENOMEM;
goto out_err;
}
--
2.13.0
More information about the mesa-dev
mailing list