Mesa (master): radeonsi: kill point size VS output if it's not used by the rasterizer
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Sep 7 11:38:27 UTC 2020
Module: Mesa
Branch: master
Commit: c56fbed99b4aeb22cec19dc83d75aba79f9fe696
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c56fbed99b4aeb22cec19dc83d75aba79f9fe696
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sun Sep 6 01:22:01 2020 -0400
radeonsi: kill point size VS output if it's not used by the rasterizer
Fixed-func shaders can contain the output, because their generator
doesn't consider the current primitive type into account.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6620>
---
src/gallium/drivers/radeonsi/si_shader.h | 1 +
src/gallium/drivers/radeonsi/si_shader_llvm_vs.c | 7 ++++---
src/gallium/drivers/radeonsi/si_state.c | 6 +++++-
src/gallium/drivers/radeonsi/si_state.h | 1 +
src/gallium/drivers/radeonsi/si_state_shaders.c | 25 ++++++++++++++++++------
5 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 805a8b1e87a..d26f36a4388 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -637,6 +637,7 @@ struct si_shader_key {
struct {
/* For HW VS (it can be VS, TES, GS) */
uint64_t kill_outputs; /* "get_unique_index" bits */
+ unsigned kill_pointsize : 1;
unsigned clip_disable : 1;
/* For NGG VS and TES. */
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
index daa992b42d0..96313d11175 100644
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -593,12 +593,13 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
pos_args[0].out[3] = ctx->ac.f32_1; /* W */
}
+ bool writes_psize = shader->selector->info.writes_psize && !shader->key.opt.kill_pointsize;
bool pos_writes_edgeflag = shader->selector->info.writes_edgeflag && !shader->key.as_ngg;
/* Write the misc vector (point size, edgeflag, layer, viewport). */
- if (shader->selector->info.writes_psize || pos_writes_edgeflag ||
+ if (writes_psize || pos_writes_edgeflag ||
shader->selector->info.writes_viewport_index || shader->selector->info.writes_layer) {
- pos_args[1].enabled_channels = shader->selector->info.writes_psize |
+ pos_args[1].enabled_channels = writes_psize |
(pos_writes_edgeflag << 1) |
(shader->selector->info.writes_layer << 2);
@@ -611,7 +612,7 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
pos_args[1].out[3] = ctx->ac.f32_0; /* W */
- if (shader->selector->info.writes_psize)
+ if (writes_psize)
pos_args[1].out[0] = psize_value;
if (pos_writes_edgeflag) {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 4c2e0c7a6c1..75507a30cc4 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -871,6 +871,9 @@ static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rast
rs->polygon_mode_is_lines =
(state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) ||
(state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK));
+ rs->polygon_mode_is_points =
+ (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) ||
+ (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK));
rs->pa_sc_line_stipple = state->line_stipple_enable
? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
S_028A0C_REPEAT_COUNT(state->line_stipple_factor)
@@ -1020,7 +1023,8 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
old_rs->poly_stipple_enable != rs->poly_stipple_enable ||
old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth ||
old_rs->clamp_fragment_color != rs->clamp_fragment_color ||
- old_rs->force_persample_interp != rs->force_persample_interp)
+ old_rs->force_persample_interp != rs->force_persample_interp ||
+ old_rs->polygon_mode_is_points != rs->polygon_mode_is_points)
sctx->do_update_shaders = true;
}
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index bb7a73c938e..4d42a40d517 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -95,6 +95,7 @@ struct si_state_rasterizer {
unsigned provoking_vertex_first : 1;
unsigned polygon_mode_enabled : 1;
unsigned polygon_mode_is_lines : 1;
+ unsigned polygon_mode_is_points : 1;
};
struct si_dsa_stencil_ref_part {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index df89c9dfe6b..9e1f088e16f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1039,11 +1039,17 @@ unsigned si_get_input_prim(const struct si_shader_selector *gs)
return PIPE_PRIM_TRIANGLES; /* worst case for all callers */
}
-static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel, bool ngg)
+static unsigned si_get_vs_out_cntl(const struct si_shader_selector *sel,
+ const struct si_shader *shader, bool ngg)
{
- bool misc_vec_ena = sel->info.writes_psize || (sel->info.writes_edgeflag && !ngg) ||
+ bool writes_psize = sel->info.writes_psize;
+
+ if (shader)
+ writes_psize &= !shader->key.opt.kill_pointsize;
+
+ bool misc_vec_ena = writes_psize || (sel->info.writes_edgeflag && !ngg) ||
sel->info.writes_layer || sel->info.writes_viewport_index;
- return S_02881C_USE_VTX_POINT_SIZE(sel->info.writes_psize) |
+ return S_02881C_USE_VTX_POINT_SIZE(writes_psize) |
S_02881C_USE_VTX_EDGE_FLAG(sel->info.writes_edgeflag && !ngg) |
S_02881C_USE_VTX_RENDER_TARGET_INDX(sel->info.writes_layer) |
S_02881C_USE_VTX_VIEWPORT_INDX(sel->info.writes_viewport_index) |
@@ -1219,7 +1225,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) |
/* Reuse for NGG. */
S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
- shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(gs_sel, true);
+ shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true);
/* Oversubscribe PC. This improves performance when there are too many varyings. */
float oversub_pc_factor = 0.25;
@@ -1425,7 +1431,7 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
: V_02870C_SPI_SHADER_NONE);
shader->ctx_reg.vs.ge_pc_alloc = S_030980_OVERSUB_EN(sscreen->info.use_late_alloc) |
S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1);
- shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, false);
+ shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, false);
oc_lds_en = shader->selector->info.stage == MESA_SHADER_TESS_EVAL ? 1 : 0;
@@ -1789,6 +1795,13 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx, struct si_shad
if (sctx->ps_shader.cso && sctx->ps_shader.cso->info.uses_primid)
key->mono.u.vs_export_prim_id = 1;
+
+ /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */
+ if (sctx->chip_class >= GFX10 &&
+ vs->info.writes_psize &&
+ sctx->current_rast_prim != PIPE_PRIM_POINTS &&
+ !sctx->queued.named.rasterizer->polygon_mode_is_points)
+ key->opt.kill_pointsize = 1;
}
/* Compute the key for the hw shader variant */
@@ -2743,7 +2756,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
/* PA_CL_VS_OUT_CNTL */
if (sctx->chip_class <= GFX9)
- sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, false);
+ sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false);
sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
More information about the mesa-commit
mailing list