Mesa (master): radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips

Marek Olšák mareko at kemper.freedesktop.org
Thu Dec 1 01:17:11 UTC 2016


Module: Mesa
Branch: master
Commit: 72d48fcd8eb5862c72d27e5462c289c5de65396e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=72d48fcd8eb5862c72d27e5462c289c5de65396e

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Nov 29 19:25:03 2016 +0100

radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips

All codepaths are handled except for clover.

Cc: 13.0 <mesa-stable at lists.freedesktop.org>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

---

 src/gallium/drivers/radeonsi/si_compute.c    |  1 +
 src/gallium/drivers/radeonsi/si_shader.c     | 24 ++++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_shader.h     |  2 ++
 src/gallium/drivers/radeonsi/si_state_draw.c |  6 ++++--
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 91f1b0a..9d83cb3 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -348,6 +348,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
 			lds_blocks += align(program->local_size, 512) >> 9;
 		}
 
+		/* TODO: use si_multiwave_lds_size_workaround */
 		assert(lds_blocks <= 0xFF);
 
 		config->rsrc2 &= C_00B84C_LDS_SIZE;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b19c61e..80c063a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -8198,11 +8198,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
 	return true;
 }
 
-static void si_fix_num_sgprs(struct si_shader *shader)
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+				      unsigned *lds_size)
+{
+	/* SPI barrier management bug:
+	 *   Make sure we have at least 4k of LDS in use to avoid the bug.
+	 *   It applies to workgroup sizes of more than one wavefront.
+	 */
+	if (sscreen->b.family == CHIP_BONAIRE ||
+	    sscreen->b.family == CHIP_KABINI ||
+	    sscreen->b.family == CHIP_MULLINS)
+		*lds_size = MAX2(*lds_size, 8);
+}
+
+static void si_fix_resource_usage(struct si_screen *sscreen,
+				  struct si_shader *shader)
 {
 	unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
 
 	shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+
+	if (shader->selector->type == PIPE_SHADER_COMPUTE &&
+	    si_get_max_workgroup_size(shader) > 64) {
+		si_multiwave_lds_size_workaround(sscreen,
+						 &shader->config.lds_size);
+	}
 }
 
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
@@ -8297,7 +8317,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		}
 	}
 
-	si_fix_num_sgprs(shader);
+	si_fix_resource_usage(sscreen, shader);
 	si_shader_dump(sscreen, shader, debug, sel->info.processor,
 		       stderr);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d4bc47b..129e571 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -547,6 +547,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
 		    struct pipe_debug_callback *debug, unsigned processor,
 		    FILE *f);
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+				      unsigned *lds_size);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
 			struct si_shader *shader,
 			struct si_shader_config *config,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e904164..10073ef 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -176,11 +176,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
 
 	if (sctx->b.chip_class >= CIK) {
 		assert(lds_size <= 65536);
-		ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+		lds_size = align(lds_size, 512) / 512;
 	} else {
 		assert(lds_size <= 32768);
-		ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+		lds_size = align(lds_size, 256) / 256;
 	}
+	si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+	ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
 
 	/* Due to a hw bug, RSRC2_LS must be written twice with another
 	 * LS register written in between. */




More information about the mesa-commit mailing list