Mesa (main): radeonsi: don't use GS SGPR6 for the small prim cull info
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Nov 16 20:00:21 UTC 2021
Module: Mesa
Branch: main
Commit: ba6d389fa7a0ac512cb9d4cdd21efde990f041b1
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba6d389fa7a0ac512cb9d4cdd21efde990f041b1
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sat Nov 6 14:08:53 2021 -0400
radeonsi: don't use GS SGPR6 for the small prim cull info
use a user SGPR instead. This will be needed in the future.
Also don't upload small_prim_precision because it's passed via
VS_STATE_BITS.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13811>
---
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c | 2 +-
src/gallium/drivers/radeonsi/si_blit.c | 4 ++++
src/gallium/drivers/radeonsi/si_shader.c | 14 +++++++-------
src/gallium/drivers/radeonsi/si_shader.h | 4 +++-
src/gallium/drivers/radeonsi/si_state_draw.cpp | 2 +-
src/gallium/drivers/radeonsi/si_state_viewport.c | 9 +++++----
6 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 17a3747db29..6d3bc0bba3a 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -1319,7 +1319,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
ret = si_insert_input_ptr(ctx, ret, ctx->args.base_vertex, 8 + SI_SGPR_BASE_VERTEX);
ret = si_insert_input_ptr(ctx, ret, ctx->args.draw_id, 8 + SI_SGPR_DRAWID);
ret = si_insert_input_ptr(ctx, ret, ctx->args.start_instance, 8 + SI_SGPR_START_INSTANCE);
- ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + SI_VS_NUM_USER_SGPR);
+ ret = si_insert_input_ptr(ctx, ret, ctx->args.vertex_buffers, 8 + GFX9_GS_NUM_USER_SGPR);
for (unsigned i = 0; i < shader->selector->num_vbos_in_user_sgprs; i++) {
ret = si_insert_input_v4i32(ctx, ret, ctx->vb_descriptors[i],
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c2c96d0dfbe..fd18d8ff4ac 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -99,6 +99,10 @@ void si_blitter_end(struct si_context *sctx)
* non-global VS user SGPRs. */
sctx->shader_pointers_dirty |= SI_DESCS_SHADER_MASK(VERTEX);
+ /* Reset SI_SGPR_SMALL_PRIM_CULL_INFO: */
+ if (sctx->screen->use_ngg_culling)
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state);
+
unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sctx->screen);
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
sctx->num_vertex_elements >
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 5fb4139e07c..51807271d32 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -524,10 +524,8 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.merged_wave_info);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.scratch_offset);
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
- &ctx->small_prim_cull_info); /* SPI_SHADER_PGM_LO_GS << 8 */
- ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT,
- NULL); /* unused (SPI_SHADER_PGM_LO/HI_GS >> 24) */
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
declare_global_desc_pointers(ctx);
if (ctx->stage != MESA_SHADER_VERTEX || !shader->selector->info.base.vs.blit_sgprs_amd) {
@@ -543,19 +541,21 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.base_vertex);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.draw_id);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.start_instance);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->small_prim_cull_info);
+ declare_vb_descriptor_input_sgprs(ctx);
}
} else {
+ /* TES or GS */
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
if (ctx->stage == MESA_SHADER_TESS_EVAL) {
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tes_offchip_addr);
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* unused */
+ ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &ctx->small_prim_cull_info);
}
}
- if (ctx->stage == MESA_SHADER_VERTEX)
- declare_vb_descriptor_input_sgprs(ctx);
-
/* VGPRs (first GS, then VS/TES) */
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[0]);
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &ctx->args.gs_vtx_offset[1]);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 887cdc1e7b5..d6e1720278a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -211,9 +211,11 @@ enum
/* GS limits */
GFX6_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
- GFX9_GS_NUM_USER_SGPR = MAX2(SI_VS_NUM_USER_SGPR, SI_TES_NUM_USER_SGPR),
SI_GSCOPY_NUM_USER_SGPR = SI_NUM_VS_STATE_RESOURCE_SGPRS,
+ GFX9_SGPR_SMALL_PRIM_CULL_INFO = MAX2(SI_VS_NUM_USER_SGPR, SI_TES_NUM_USER_SGPR),
+ GFX9_GS_NUM_USER_SGPR,
+
/* PS only */
SI_SGPR_ALPHA_REF = SI_NUM_RESOURCE_SGPRS,
SI_PS_NUM_USER_SGPR,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 84a189be71e..2add72f5623 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -1862,7 +1862,7 @@ static bool si_upload_and_prefetch_VB_descriptors(struct si_context *sctx,
if (GFX_VERSION >= GFX9) {
if (HAS_TESS)
sh_dw_offset = GFX9_TCS_NUM_USER_SGPR;
- else if (HAS_GS)
+ else if (HAS_GS || NGG)
sh_dw_offset = GFX9_GS_NUM_USER_SGPR;
}
diff --git a/src/gallium/drivers/radeonsi/si_state_viewport.c b/src/gallium/drivers/radeonsi/si_state_viewport.c
index 7db69b9df0d..8894732a7a0 100644
--- a/src/gallium/drivers/radeonsi/si_state_viewport.c
+++ b/src/gallium/drivers/radeonsi/si_state_viewport.c
@@ -105,6 +105,7 @@ static void si_emit_cull_state(struct si_context *sctx)
{
assert(sctx->screen->use_ngg_culling);
+ const unsigned upload_size = offsetof(struct si_small_prim_cull_info, small_prim_precision);
struct si_small_prim_cull_info info;
si_get_small_prim_cull_info(sctx, &info);
@@ -112,8 +113,8 @@ static void si_emit_cull_state(struct si_context *sctx)
memcmp(&info, &sctx->last_small_prim_cull_info, sizeof(info))) {
unsigned offset = 0;
- /* Align to 256, because the address is shifted by 8 bits. */
- u_upload_data(sctx->b.const_uploader, 0, sizeof(info), 256, &info, &offset,
+ u_upload_data(sctx->b.const_uploader, 0, upload_size,
+ si_optimal_tcc_alignment(sctx, upload_size), &info, &offset,
(struct pipe_resource **)&sctx->small_prim_cull_info_buf);
sctx->small_prim_cull_info_address = sctx->small_prim_cull_info_buf->gpu_address + offset;
@@ -124,8 +125,8 @@ static void si_emit_cull_state(struct si_context *sctx)
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->small_prim_cull_info_buf,
RADEON_USAGE_READ | RADEON_PRIO_CONST_BUFFER);
radeon_begin(&sctx->gfx_cs);
- radeon_set_sh_reg(R_00B220_SPI_SHADER_PGM_LO_GS,
- sctx->small_prim_cull_info_address >> 8);
+ radeon_set_sh_reg(R_00B230_SPI_SHADER_USER_DATA_GS_0 + GFX9_SGPR_SMALL_PRIM_CULL_INFO * 4,
+ sctx->small_prim_cull_info_address);
radeon_end();
/* Set VS_STATE.SMALL_PRIM_PRECISION for NGG culling.
More information about the mesa-commit
mailing list