Mesa (main): radeonsi: merge all preamble states into one
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue May 17 10:58:38 UTC 2022
Module: Mesa
Branch: main
Commit: 32c7805ccca331f726da684a4e74f7d1138daa3d
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=32c7805ccca331f726da684a4e74f7d1138daa3d
Author: Marek Olšák <marek.olsak at amd.com>
Date: Sat May 14 10:23:05 2022 -0400
radeonsi: merge all preamble states into one
Tess registers are appended. GS registers are appended or overwritten
if they are already set. There are separate TMZ and non-TMZ preambles.
The preamble will be passed to the kernel as an IB to execute on a context
switch only.
Reviewed-by: Mihai Preda <mhpreda at gmail.com>
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16509>
---
src/gallium/drivers/radeonsi/si_debug.c | 4 -
src/gallium/drivers/radeonsi/si_gfx_cs.c | 7 +-
src/gallium/drivers/radeonsi/si_pipe.c | 9 +-
src/gallium/drivers/radeonsi/si_pipe.h | 8 +-
src/gallium/drivers/radeonsi/si_state.c | 4 +
src/gallium/drivers/radeonsi/si_state_shaders.cpp | 134 +++++++++++-----------
6 files changed, 82 insertions(+), 84 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index c53d2f589b9..85f5667f2a7 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -416,10 +416,6 @@ static void si_log_chunk_type_cs_print(void *data, FILE *f)
if (ctx->cs_preamble_state)
ac_parse_ib(f, ctx->cs_preamble_state->pm4, ctx->cs_preamble_state->ndw, NULL, 0,
"IB2: Init config", ctx->gfx_level, NULL, NULL);
-
- if (ctx->cs_preamble_gs_rings)
- ac_parse_ib(f, ctx->cs_preamble_gs_rings->pm4, ctx->cs_preamble_gs_rings->ndw, NULL, 0,
- "IB2: Init GS rings", ctx->gfx_level, NULL, NULL);
}
if (scs->flushed) {
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 7cc718fd452..6bebfffccb9 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -437,12 +437,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs)
/* The CS initialization should be emitted before everything else. */
if (ctx->cs_preamble_state)
- si_pm4_emit(ctx, ctx->cs_preamble_state);
- if (ctx->cs_preamble_tess_rings)
- si_pm4_emit(ctx, unlikely(is_secure) ? ctx->cs_preamble_tess_rings_tmz :
- ctx->cs_preamble_tess_rings);
- if (ctx->cs_preamble_gs_rings)
- si_pm4_emit(ctx, ctx->cs_preamble_gs_rings);
+ si_pm4_emit(ctx, unlikely(is_secure) ? ctx->cs_preamble_state_tmz : ctx->cs_preamble_state);
if (ctx->queued.named.ls)
ctx->prefetch_L2_mask |= SI_PREFETCH_LS;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 383056f03fe..589af37dd85 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -222,12 +222,9 @@ static void si_destroy_context(struct pipe_context *context)
if (sctx->cs_preamble_state)
si_pm4_free_state(sctx, sctx->cs_preamble_state, ~0);
- if (sctx->cs_preamble_tess_rings)
- si_pm4_free_state(sctx, sctx->cs_preamble_tess_rings, ~0);
- if (sctx->cs_preamble_tess_rings_tmz)
- si_pm4_free_state(sctx, sctx->cs_preamble_tess_rings_tmz, ~0);
- if (sctx->cs_preamble_gs_rings)
- si_pm4_free_state(sctx, sctx->cs_preamble_gs_rings, ~0);
+ if (sctx->cs_preamble_state_tmz)
+ si_pm4_free_state(sctx, sctx->cs_preamble_state_tmz, ~0);
+
for (i = 0; i < ARRAY_SIZE(sctx->vgt_shader_config); i++)
si_pm4_free_state(sctx, sctx->vgt_shader_config[i], SI_STATE_IDX(vgt_shader_config));
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index a492faec8f6..8dcdb6a0d99 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1051,10 +1051,12 @@ struct si_context {
/* Precomputed states. */
struct si_pm4_state *cs_preamble_state;
- struct si_pm4_state *cs_preamble_tess_rings;
- struct si_pm4_state *cs_preamble_tess_rings_tmz;
- struct si_pm4_state *cs_preamble_gs_rings;
+ struct si_pm4_state *cs_preamble_state_tmz;
+ uint16_t gs_ring_state_dw_offset;
+ uint16_t gs_ring_state_dw_offset_tmz;
bool cs_preamble_has_vgt_flush;
+ bool cs_preamble_has_vgt_flush_tmz;
+
struct si_pm4_state *vgt_shader_config[SI_NUM_VGT_STAGES_STATES];
/* shaders */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index aea74b7f1e9..ee19553f370 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -5905,4 +5905,8 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
}
sctx->cs_preamble_state = pm4;
+
+ /* Make a copy of the preamble for TMZ. */
+ sctx->cs_preamble_state_tmz = (struct si_pm4_state *)CALLOC_STRUCT(si_cs_preamble);
+ memcpy(sctx->cs_preamble_state_tmz, sctx->cs_preamble_state, sizeof(struct si_cs_preamble));
}
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.cpp b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
index 8d70cfcba90..4e824f39d28 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.cpp
@@ -3666,22 +3666,27 @@ static void si_delete_shader_selector(struct pipe_context *ctx, void *state)
/**
* Writing CONFIG or UCONFIG VGT registers requires VGT_FLUSH before that.
*/
-static void si_cs_preamble_add_vgt_flush(struct si_context *sctx)
+static void si_cs_preamble_add_vgt_flush(struct si_context *sctx, bool tmz)
{
+ struct si_pm4_state *pm4 = tmz ? sctx->cs_preamble_state_tmz : sctx->cs_preamble_state;
+ bool *has_vgt_flush = tmz ? &sctx->cs_preamble_has_vgt_flush_tmz :
+ &sctx->cs_preamble_has_vgt_flush;
+
/* We shouldn't get here if registers are shadowed. */
assert(!sctx->shadowed_regs);
- if (sctx->cs_preamble_has_vgt_flush)
+ if (*has_vgt_flush)
return;
/* Done by Vulkan before VGT_FLUSH. */
- si_pm4_cmd_add(sctx->cs_preamble_state, PKT3(PKT3_EVENT_WRITE, 0, 0));
- si_pm4_cmd_add(sctx->cs_preamble_state, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
/* VGT_FLUSH is required even if VGT is idle. It resets VGT pointers. */
- si_pm4_cmd_add(sctx->cs_preamble_state, PKT3(PKT3_EVENT_WRITE, 0, 0));
- si_pm4_cmd_add(sctx->cs_preamble_state, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
- sctx->cs_preamble_has_vgt_flush = true;
+ si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+
+ *has_vgt_flush = true;
}
/**
@@ -3709,7 +3714,6 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
struct si_shader_selector *es =
sctx->shader.tes.cso ? sctx->shader.tes.cso : sctx->shader.vs.cso;
struct si_shader_selector *gs = sctx->shader.gs.cso;
- struct si_pm4_state *pm4;
/* Chip constants. */
unsigned num_se = sctx->screen->info.max_se;
@@ -3811,31 +3815,42 @@ bool si_update_gs_ring_buffers(struct si_context *sctx)
}
/* The codepath without register shadowing. */
- /* Create the "cs_preamble_gs_rings" state. */
- pm4 = CALLOC_STRUCT(si_pm4_state);
- if (!pm4)
- return false;
+ for (unsigned tmz = 0; tmz <= 1; tmz++) {
+ struct si_pm4_state *pm4 = tmz ? sctx->cs_preamble_state_tmz : sctx->cs_preamble_state;
+ uint16_t *gs_ring_state_dw_offset = tmz ? &sctx->gs_ring_state_dw_offset_tmz :
+ &sctx->gs_ring_state_dw_offset;
+ unsigned old_ndw = 0;
- if (sctx->gfx_level >= GFX7) {
- if (sctx->esgs_ring) {
- assert(sctx->gfx_level <= GFX8);
- si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256);
+ si_cs_preamble_add_vgt_flush(sctx, tmz);
+
+ if (!*gs_ring_state_dw_offset) {
+ /* We are here for the first time. The packets will be added. */
+ *gs_ring_state_dw_offset = pm4->ndw;
+ } else {
+ /* We have been here before. Overwrite the previous packets. */
+ old_ndw = pm4->ndw;
+ pm4->ndw = *gs_ring_state_dw_offset;
}
- if (sctx->gsvs_ring)
- si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE, sctx->gsvs_ring->width0 / 256);
- } else {
- if (sctx->esgs_ring)
- si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256);
- if (sctx->gsvs_ring)
- si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE, sctx->gsvs_ring->width0 / 256);
- }
- /* Set the state. */
- if (sctx->cs_preamble_gs_rings)
- si_pm4_free_state(sctx, sctx->cs_preamble_gs_rings, ~0);
- sctx->cs_preamble_gs_rings = pm4;
+ if (sctx->gfx_level >= GFX7) {
+ if (sctx->esgs_ring) {
+ assert(sctx->gfx_level <= GFX8);
+ si_pm4_set_reg(pm4, R_030900_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256);
+ }
+ if (sctx->gsvs_ring)
+ si_pm4_set_reg(pm4, R_030904_VGT_GSVS_RING_SIZE, sctx->gsvs_ring->width0 / 256);
+ } else {
+ if (sctx->esgs_ring)
+ si_pm4_set_reg(pm4, R_0088C8_VGT_ESGS_RING_SIZE, sctx->esgs_ring->width0 / 256);
+ if (sctx->gsvs_ring)
+ si_pm4_set_reg(pm4, R_0088CC_VGT_GSVS_RING_SIZE, sctx->gsvs_ring->width0 / 256);
+ }
- si_cs_preamble_add_vgt_flush(sctx);
+ if (old_ndw) {
+ pm4->ndw = old_ndw;
+ pm4->last_opcode = 255; /* invalid opcode (we don't save the last opcode) */
+ }
+ }
/* Flush the context to re-emit both cs_preamble states. */
sctx->initial_gfx_cs_size = 0; /* force flush */
@@ -4081,42 +4096,31 @@ void si_init_tess_factor_ring(struct si_context *sctx)
return;
}
- /* The codepath without register shadowing. */
- si_cs_preamble_add_vgt_flush(sctx);
-
- /* Append these registers to the init config state. */
- if (sctx->gfx_level >= GFX7) {
- si_pm4_set_reg(sctx->cs_preamble_state, R_030938_VGT_TF_RING_SIZE,
- S_030938_SIZE(tf_ring_size_field));
- si_pm4_set_reg(sctx->cs_preamble_state, R_030940_VGT_TF_MEMORY_BASE, factor_va >> 8);
- if (sctx->gfx_level >= GFX10)
- si_pm4_set_reg(sctx->cs_preamble_state, R_030984_VGT_TF_MEMORY_BASE_HI,
- S_030984_BASE_HI(factor_va >> 40));
- else if (sctx->gfx_level == GFX9)
- si_pm4_set_reg(sctx->cs_preamble_state, R_030944_VGT_TF_MEMORY_BASE_HI,
- S_030944_BASE_HI(factor_va >> 40));
- si_pm4_set_reg(sctx->cs_preamble_state, R_03093C_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->hs.hs_offchip_param);
- } else {
- struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
-
- si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE,
- S_008988_SIZE(tf_ring_size_field));
- si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
- si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->hs.hs_offchip_param);
- sctx->cs_preamble_tess_rings = pm4;
-
- if (sctx->screen->info.has_tmz_support) {
- pm4 = CALLOC_STRUCT(si_pm4_state);
- uint64_t factor_va_tmz =
- si_resource(sctx->tess_rings_tmz)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
- si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE,
- S_008988_SIZE(tf_ring_size_field));
- si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va_tmz >> 8);
- si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- sctx->screen->hs.hs_offchip_param);
- sctx->cs_preamble_tess_rings_tmz = pm4;
+ /* The codepath without register shadowing is below. */
+ /* Add these registers to cs_preamble_state. */
+ for (unsigned tmz = 0; tmz <= 1; tmz++) {
+ struct si_pm4_state *pm4 = tmz ? sctx->cs_preamble_state_tmz : sctx->cs_preamble_state;
+ struct pipe_resource *tf_ring = tmz ? sctx->tess_rings_tmz : sctx->tess_rings;
+
+ if (!tf_ring)
+ continue; /* TMZ not supported */
+
+ uint64_t va = si_resource(tf_ring)->gpu_address + sctx->screen->hs.tess_offchip_ring_size;
+
+ si_cs_preamble_add_vgt_flush(sctx, tmz);
+
+ if (sctx->gfx_level >= GFX7) {
+ si_pm4_set_reg(pm4, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size_field));
+ si_pm4_set_reg(pm4, R_03093C_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
+ si_pm4_set_reg(pm4, R_030940_VGT_TF_MEMORY_BASE, va >> 8);
+ if (sctx->gfx_level >= GFX10)
+ si_pm4_set_reg(pm4, R_030984_VGT_TF_MEMORY_BASE_HI, S_030984_BASE_HI(va >> 40));
+ else if (sctx->gfx_level == GFX9)
+ si_pm4_set_reg(pm4, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(va >> 40));
+ } else {
+ si_pm4_set_reg(pm4, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size_field));
+ si_pm4_set_reg(pm4, R_0089B8_VGT_TF_MEMORY_BASE, factor_va >> 8);
+ si_pm4_set_reg(pm4, R_0089B0_VGT_HS_OFFCHIP_PARAM, sctx->screen->hs.hs_offchip_param);
}
}
More information about the mesa-commit
mailing list