[Mesa-dev] [PATCH 2/7] radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)
Marek Olšák
maraeo at gmail.com
Wed Jun 7 19:50:54 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
The workaround causes a massive performance decrease on 1-SE parts.
(Cape Verde, Hainan, Oland)
The performance regression is already part of 17.0 and 17.1.
v2: check tess_uses_prim_id
Cc: 17.0 17.1 <mesa-stable at lists.freedesktop.org>
---
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_state_draw.c | 35 ++++++++++++++++------------
2 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 108929c..5559946 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -364,20 +364,21 @@ struct si_context {
unsigned spi_tmpring_size;
struct r600_resource *compute_scratch_buffer;
/* Emitted derived tessellation state. */
/* Local shader (VS), or HS if LS-HS are merged. */
struct si_shader *last_ls;
struct si_shader_selector *last_tcs;
int last_num_tcs_input_cp;
int last_tes_sh_base;
+ bool last_tess_uses_primid;
unsigned last_num_patches;
/* Debug state. */
bool is_debug;
struct radeon_saved_cs last_gfx;
struct r600_resource *last_trace_buf;
struct r600_resource *trace_buf;
unsigned trace_id;
uint64_t dmesg_timestamp;
unsigned apitrace_call_number;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cd069e3..8508259 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -95,20 +95,23 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
const struct pipe_draw_info *info,
unsigned *num_patches)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
struct si_shader *ls_current;
struct si_shader_selector *ls;
/* The TES pointer will only be used for sctx->last_tcs.
* It would be wrong to think that TCS = TES. */
struct si_shader_selector *tcs =
sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
+ unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tess_uses_prim_id;
+ bool has_primid_instancing_bug = sctx->b.chip_class == SI &&
+ sctx->b.screen->info.max_se == 1;
unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
unsigned num_tcs_input_cp = info->vertices_per_patch;
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
unsigned num_tcs_patch_outputs;
unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size;
unsigned input_patch_size, output_patch_size, output_patch0_offset;
unsigned perpatch_output_offset, lds_size;
unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets;
unsigned offchip_layout, hardware_lds_size, ls_hs_config;
@@ -121,29 +124,32 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
ls = ls_current->key.part.tcs.ls;
} else {
ls_current = sctx->vs_shader.current;
ls = sctx->vs_shader.cso;
}
if (sctx->last_ls == ls_current &&
sctx->last_tcs == tcs &&
sctx->last_tes_sh_base == tes_sh_base &&
- sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
+ sctx->last_num_tcs_input_cp == num_tcs_input_cp &&
+ (!has_primid_instancing_bug ||
+ (sctx->last_tess_uses_primid == tess_uses_primid))) {
*num_patches = sctx->last_num_patches;
return;
}
sctx->last_ls = ls_current;
sctx->last_tcs = tcs;
sctx->last_tes_sh_base = tes_sh_base;
sctx->last_num_tcs_input_cp = num_tcs_input_cp;
+ sctx->last_tess_uses_primid = tess_uses_primid;
/* This calculates how shader inputs and outputs among VS, TCS, and TES
* are laid out in LDS. */
num_tcs_inputs = util_last_bit64(ls->outputs_written);
if (sctx->tcs_shader.cso) {
num_tcs_outputs = util_last_bit64(tcs->outputs_written);
num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT];
num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written);
} else {
@@ -187,36 +193,35 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
* specific value is taken from the proprietary driver.
*/
*num_patches = MIN2(*num_patches, 40);
if (sctx->b.chip_class == SI) {
/* SI bug workaround, related to power management. Limit LS-HS
* threadgroups to only one wave.
*/
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
*num_patches = MIN2(*num_patches, one_wave);
-
- if (sctx->screen->b.info.max_se == 1) {
- /* The VGT HS block increments the patch ID unconditionally
- * within a single threadgroup. This results in incorrect
- * patch IDs when instanced draws are used.
- *
- * The intended solution is to restrict threadgroups to
- * a single instance by setting SWITCH_ON_EOI, which
- * should cause IA to split instances up. However, this
- * doesn't work correctly on SI when there is no other
- * SE to switch to.
- */
- *num_patches = 1;
- }
}
+ /* The VGT HS block increments the patch ID unconditionally
+ * within a single threadgroup. This results in incorrect
+ * patch IDs when instanced draws are used.
+ *
+ * The intended solution is to restrict threadgroups to
+ * a single instance by setting SWITCH_ON_EOI, which
+ * should cause IA to split instances up. However, this
+ * doesn't work correctly on SI when there is no other
+ * SE to switch to.
+ */
+ if (has_primid_instancing_bug)
+ *num_patches = 1;
+
sctx->last_num_patches = *num_patches;
output_patch0_offset = input_patch_size * *num_patches;
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
/* Compute userdata SGPRs. */
assert(((input_vertex_size / 4) & ~0xff) == 0);
assert(((output_vertex_size / 4) & ~0xff) == 0);
assert(((input_patch_size / 4) & ~0x1fff) == 0);
assert(((output_patch_size / 4) & ~0x1fff) == 0);
--
2.7.4
More information about the mesa-dev
mailing list