Mesa (main): radeonsi: remove the use of PKT3_CONTEXT_REG_RMW
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Oct 13 10:44:16 UTC 2021
Module: Mesa
Branch: main
Commit: 8f9945a75bbb569e6a745f03b027877a9cda8789
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8f9945a75bbb569e6a745f03b027877a9cda8789
Author: Arvind Yadav <arvind.yadav at amd.com>
Date: Thu Sep 30 17:52:02 2021 +0530
radeonsi: remove the use of PKT3_CONTEXT_REG_RMW
This patch is to to remove PKT3_CONTEXT_REG_RMW from radeonsi.
and avoid multiple command buffer(PM4 packet)creation for R_02881C_PA_CL_VS_OUT_CNTL.
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Signed-off-by: Arvind Yadav <arvind.yadav at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12789>
---
src/gallium/drivers/radeonsi/si_build_pm4.h | 22 ----------------------
src/gallium/drivers/radeonsi/si_gfx_cs.c | 3 +--
src/gallium/drivers/radeonsi/si_shader.h | 1 -
src/gallium/drivers/radeonsi/si_state.c | 11 ++---------
src/gallium/drivers/radeonsi/si_state.h | 9 +--------
src/gallium/drivers/radeonsi/si_state_draw.cpp | 6 +++++-
src/gallium/drivers/radeonsi/si_state_shaders.c | 23 ++++++-----------------
7 files changed, 15 insertions(+), 60 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
index c32f3caa11d..66589d6a0b3 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -152,28 +152,6 @@
radeon_emit(value); \
} while (0)
-#define radeon_set_context_reg_rmw(reg, value, mask) do { \
- SI_CHECK_SHADOWED_REGS(reg, 1); \
- assert((reg) >= SI_CONTEXT_REG_OFFSET); \
- radeon_emit(PKT3(PKT3_CONTEXT_REG_RMW, 2, 0)); \
- radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
- radeon_emit(mask); \
- radeon_emit(value); \
-} while (0)
-
-/* Emit PKT3_CONTEXT_REG_RMW if the register value is different. */
-#define radeon_opt_set_context_reg_rmw(sctx, offset, reg, val, mask) do { \
- unsigned __value = (val); \
- assert((__value & ~mask) == 0); \
- __value &= mask; \
- if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
- sctx->tracked_regs.reg_value[reg] != __value) { \
- radeon_set_context_reg_rmw(offset, __value, mask); \
- sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
- sctx->tracked_regs.reg_value[reg] = __value; \
- } \
-} while (0)
-
/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
#define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
unsigned __value = val; \
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 9cb7cd0f813..e38b18f71fa 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -247,8 +247,7 @@ void si_set_tracked_regs_to_clear_state(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__VS] = 0x00000000;
- ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL__CL] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_VRS_OVERRIDE_CNTL] = 0x00000000;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 4072a6c028d..d6dbb13ed55 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -462,7 +462,6 @@ struct si_shader_selector {
ubyte cs_num_images_in_user_sgprs;
ubyte num_vs_inputs;
ubyte num_vbos_in_user_sgprs;
- unsigned pa_cl_vs_out_cntl;
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
ubyte clipdist_mask;
ubyte culldist_mask;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 2bcd16f189c..032db20c206 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -856,15 +856,8 @@ static void si_emit_clip_regs(struct si_context *sctx)
clipdist_mask | (culldist_mask << 8);
radeon_begin(&sctx->gfx_cs);
-
- if (sctx->chip_class >= GFX10) {
- radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, pa_cl_cntl,
- ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
- } else {
- radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL__CL,
- vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl);
- }
+ radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL,
+ pa_cl_cntl | vs->pa_cl_vs_out_cntl);
radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL,
rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space));
radeon_end_update_context_roll(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 931230260a4..a6daa158b65 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -249,12 +249,6 @@ struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS];
};
-#define SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK \
- (S_02881C_USE_VTX_POINT_SIZE(1) | S_02881C_USE_VTX_EDGE_FLAG(1) | \
- S_02881C_USE_VTX_RENDER_TARGET_INDX(1) | S_02881C_USE_VTX_VIEWPORT_INDX(1) | \
- S_02881C_VS_OUT_MISC_VEC_ENA(1) | S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(1) | \
- S_02881C_USE_VTX_VRS_RATE(1))
-
/* The list of registers whose emitted values are remembered by si_context. */
enum si_tracked_reg
{
@@ -280,8 +274,7 @@ enum si_tracked_reg
SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, /* set with SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK*/
- SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, /* set with ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK */
+ SI_TRACKED_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index cc824c6b891..0e1994808ba 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -111,6 +111,7 @@ static bool si_update_shaders(struct si_context *sctx)
struct pipe_context *ctx = (struct pipe_context *)sctx;
struct si_shader *old_vs = si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current;
unsigned old_kill_clip_distances = old_vs ? old_vs->key.opt.kill_clip_distances : 0;
+ unsigned old_pa_cl_vs_out_cntl = old_vs ? old_vs->pa_cl_vs_out_cntl : 0;
struct si_shader *old_ps = sctx->shader.ps.current;
unsigned old_spi_shader_col_format =
old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
@@ -241,7 +242,10 @@ static bool si_update_shaders(struct si_context *sctx)
*pm4 = si_build_vgt_shader_config(sctx->screen, key);
si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
- if (old_kill_clip_distances != si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.kill_clip_distances)
+ if (old_kill_clip_distances !=
+ si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->key.opt.kill_clip_distances ||
+ old_pa_cl_vs_out_cntl !=
+ si_get_vs_inline(sctx, HAS_TESS, HAS_GS)->current->pa_cl_vs_out_cntl)
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
r = si_shader_select(ctx, &sctx->shader.ps);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3589a0ca1d0..37c5a4aec4a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -879,6 +879,9 @@ static void si_shader_gs(struct si_screen *sscreen, struct si_shader *shader)
shader->ctx_reg.gs.vgt_gs_instance_cnt =
S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0);
+ /* Copy over fields from the GS copy shader to make them easily accessible from GS. */
+ shader->pa_cl_vs_out_cntl = sel->gs_copy_shader->pa_cl_vs_out_cntl;
+
va = shader->bo->gpu_address;
if (sscreen->info.chip_class >= GFX9) {
@@ -1022,9 +1025,6 @@ static void gfx10_emit_shader_ngg_tail(struct si_context *sctx, struct si_shader
radeon_opt_set_context_reg(sctx, R_028838_PA_CL_NGG_CNTL, SI_TRACKED_PA_CL_NGG_CNTL,
shader->ctx_reg.ngg.pa_cl_ngg_cntl);
- radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
radeon_end_update_context_roll(sctx);
/* These don't cause a context roll. */
@@ -1388,11 +1388,6 @@ static void si_emit_shader_vs(struct si_context *sctx)
S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
}
- if (sctx->chip_class >= GFX10) {
- radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS, shader->pa_cl_vs_out_cntl,
- SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK);
- }
radeon_end_update_context_roll(sctx);
/* GE_PC_ALLOC is not a context register, so it doesn't cause a context roll. */
@@ -1920,9 +1915,7 @@ static void si_get_vs_key_outputs(struct si_context *sctx, struct si_shader_sele
key->mono.u.vs_export_prim_id = 0;
}
- /* We need PKT3_CONTEXT_REG_RMW, which we currently only use on GFX10+. */
- key->opt.kill_pointsize = sctx->chip_class >= GFX10 &&
- vs->info.writes_psize &&
+ key->opt.kill_pointsize = vs->info.writes_psize &&
sctx->current_rast_prim != PIPE_PRIM_POINTS &&
!sctx->queued.named.rasterizer->polygon_mode_is_points;
}
@@ -3015,10 +3008,6 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
}
}
- /* PA_CL_VS_OUT_CNTL */
- if (sctx->chip_class <= GFX9)
- sel->pa_cl_vs_out_cntl = si_get_vs_out_cntl(sel, NULL, false);
-
sel->clipdist_mask = sel->info.writes_clipvertex ? SIX_BITS :
u_bit_consecutive(0, sel->info.base.clip_distance_array_size);
sel->culldist_mask = u_bit_consecutive(0, sel->info.base.cull_distance_array_size) <<
@@ -3126,11 +3115,11 @@ static void si_update_clip_regs(struct si_context *sctx, struct si_shader_select
(!old_hw_vs ||
(old_hw_vs->info.stage == MESA_SHADER_VERTEX && old_hw_vs->info.base.vs.window_space_position) !=
(next_hw_vs->info.stage == MESA_SHADER_VERTEX && next_hw_vs->info.base.vs.window_space_position) ||
- old_hw_vs->pa_cl_vs_out_cntl != next_hw_vs->pa_cl_vs_out_cntl ||
old_hw_vs->clipdist_mask != next_hw_vs->clipdist_mask ||
old_hw_vs->culldist_mask != next_hw_vs->culldist_mask || !old_hw_vs_variant ||
!next_hw_vs_variant ||
- old_hw_vs_variant->key.opt.kill_clip_distances != next_hw_vs_variant->key.opt.kill_clip_distances))
+ old_hw_vs_variant->key.opt.kill_clip_distances != next_hw_vs_variant->key.opt.kill_clip_distances ||
+ old_hw_vs_variant->pa_cl_vs_out_cntl != next_hw_vs_variant->pa_cl_vs_out_cntl))
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
}
More information about the mesa-commit
mailing list