[Mesa-dev] [PATCH 2/8] radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips
Marek Olšák
maraeo at gmail.com
Wed Nov 30 01:36:33 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
All codepaths are handled except for clover.
Cc: 13.0 <mesa-stable at lists.freedesktop.org>
---
src/gallium/drivers/radeonsi/si_compute.c | 1 +
src/gallium/drivers/radeonsi/si_shader.c | 24 ++++++++++++++++++++++--
src/gallium/drivers/radeonsi/si_shader.h | 2 ++
src/gallium/drivers/radeonsi/si_state_draw.c | 6 ++++--
4 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 91f1b0a..9d83cb3 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -341,20 +341,21 @@ static bool si_switch_compute_shader(struct si_context *sctx,
* LDS in blocks of 256 bytes, so if there are 4 bytes lds
* allocated in the shader and 4 bytes allocated by the state
* tracker, then we will set LDS_SIZE to 512 bytes rather than 256.
*/
if (sctx->b.chip_class <= SI) {
lds_blocks += align(program->local_size, 256) >> 8;
} else {
lds_blocks += align(program->local_size, 512) >> 9;
}
+ /* TODO: use si_multiwave_lds_size_workaround */
assert(lds_blocks <= 0xFF);
config->rsrc2 &= C_00B84C_LDS_SIZE;
config->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
}
if (!si_setup_compute_scratch_buffer(sctx, shader, config))
return false;
if (shader->scratch_bo) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b19c61e..80c063a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -8191,25 +8191,45 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
/* The sample mask input is always enabled, because the API shader always
* passes it through to the epilog. Disable it here if it's unused.
*/
if (!shader->key.part.ps.epilog.poly_line_smoothing &&
!shader->selector->info.reads_samplemask)
shader->config.spi_ps_input_ena &= C_0286CC_SAMPLE_COVERAGE_ENA;
return true;
}
-static void si_fix_num_sgprs(struct si_shader *shader)
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+ unsigned *lds_size)
+{
+ /* SPI barrier management bug:
+ * Make sure we have at least 4k of LDS in use to avoid the bug.
+ * It applies to workgroup sizes of more than one wavefront.
+ */
+ if (sscreen->b.family == CHIP_BONAIRE ||
+ sscreen->b.family == CHIP_KABINI ||
+ sscreen->b.family == CHIP_MULLINS)
+ *lds_size = MAX2(*lds_size, 8);
+}
+
+static void si_fix_resource_usage(struct si_screen *sscreen,
+ struct si_shader *shader)
{
unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+
+ if (shader->selector->type == PIPE_SHADER_COMPUTE &&
+ si_get_max_workgroup_size(shader) > 64) {
+ si_multiwave_lds_size_workaround(sscreen,
+ &shader->config.lds_size);
+ }
}
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader *mainp = sel->main_shader_part;
int r;
@@ -8290,21 +8310,21 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
shader->prolog->config.num_vgprs);
}
if (shader->epilog) {
shader->config.num_sgprs = MAX2(shader->config.num_sgprs,
shader->epilog->config.num_sgprs);
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->epilog->config.num_vgprs);
}
}
- si_fix_num_sgprs(shader);
+ si_fix_resource_usage(sscreen, shader);
si_shader_dump(sscreen, shader, debug, sel->info.processor,
stderr);
/* Upload. */
r = si_shader_binary_upload(sscreen, shader);
if (r) {
fprintf(stderr, "LLVM failed to upload shader\n");
return r;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index d4bc47b..129e571 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -540,20 +540,22 @@ int si_compile_llvm(struct si_screen *sscreen,
struct pipe_debug_callback *debug,
unsigned processor,
const char *name);
void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f);
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+ unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
struct si_shader_config *config,
uint64_t scratch_va);
void si_shader_binary_read_config(struct radeon_shader_binary *binary,
struct si_shader_config *conf,
unsigned symbol_offset);
unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
bool writes_samplemask);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index e904164..10073ef 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -169,25 +169,27 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
sctx->last_num_patches = *num_patches;
output_patch0_offset = input_patch_size * *num_patches;
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
lds_size = output_patch0_offset + output_patch_size * *num_patches;
ls_rsrc2 = ls->current->config.rsrc2;
if (sctx->b.chip_class >= CIK) {
assert(lds_size <= 65536);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+ lds_size = align(lds_size, 512) / 512;
} else {
assert(lds_size <= 32768);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+ lds_size = align(lds_size, 256) / 256;
}
+ si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
/* Due to a hw bug, RSRC2_LS must be written twice with another
* LS register written in between. */
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
radeon_emit(cs, ls->current->config.rsrc1);
radeon_emit(cs, ls_rsrc2);
/* Compute userdata SGPRs. */
--
2.7.4
More information about the mesa-dev
mailing list