[Mesa-dev] [PATCH 1/6] radeonsi: emit_db_render_state packets optimization
Sonny Jiang
sonny.jiang at amd.com
Thu Jun 7 16:13:48 UTC 2018
Remembering latest states of registers to eliminate redunant SET_CONTEXT_REG packets
Signed-off-by: Sonny Jiang <sonny.jiang at amd.com>
---
src/gallium/drivers/radeonsi/si_build_pm4.h | 43 +++++++++++++++++++++
src/gallium/drivers/radeonsi/si_gfx_cs.c | 3 ++
src/gallium/drivers/radeonsi/si_pipe.h | 2 +
src/gallium/drivers/radeonsi/si_state.c | 60 +++++++++++++++--------------
src/gallium/drivers/radeonsi/si_state.h | 16 ++++++++
5 files changed, 95 insertions(+), 29 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
index 22f5558..45d943f 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -110,4 +110,47 @@ static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
radeon_emit(cs, value);
}
+/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
+static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset,
+ enum si_tracked_reg reg, unsigned value)
+{
+ struct radeon_winsys_cs *cs = sctx->gfx_cs;
+
+ if (!(sctx->tracked_regs.reg_saved & (1 << reg)) ||
+ sctx->tracked_regs.reg_value[reg] != value ) {
+
+ radeon_set_context_reg(cs, offset, value);
+
+ sctx->tracked_regs.reg_saved |= 1 << reg;
+ sctx->tracked_regs.reg_value[reg] = value;
+ }
+}
+
+/**
+ * Set 2 consecutive registers if any registers value is different.
+ * @param offset starting register offset
+ * @param value1 is written to first register
+ * @param value2 is written to second register
+ */
+static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset,
+ enum si_tracked_reg reg, unsigned value1,
+ unsigned value2)
+{
+ struct radeon_winsys_cs *cs = sctx->gfx_cs;
+
+ if (!(sctx->tracked_regs.reg_saved & (1 << reg)) ||
+ !(sctx->tracked_regs.reg_saved & (1 << (reg + 1))) ||
+ sctx->tracked_regs.reg_value[reg] != value1 ||
+ sctx->tracked_regs.reg_value[reg+1] != value2 ) {
+
+ radeon_set_context_reg_seq(cs, offset, 2);
+ radeon_emit(cs, value1);
+ radeon_emit(cs, value2);
+
+ sctx->tracked_regs.reg_value[reg] = value1;
+ sctx->tracked_regs.reg_value[reg+1] = value2;
+ sctx->tracked_regs.reg_saved |= 3 << reg;
+ }
+}
+
#endif
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index ec74c1b..0b9a020 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -321,4 +321,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->last_num_tcs_input_cp = -1;
ctx->cs_shader_state.initialized = false;
+
+ /* Set all saved registers state to unknown */
+ ctx->tracked_regs.reg_saved = 0;
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5d1671f..82a8263 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1033,6 +1033,8 @@ struct si_context {
void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value);
+
+ struct si_tracked_regs tracked_regs;
};
/* cik_sdma.c */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3a7e928..c95b929 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1343,28 +1343,25 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
static void si_emit_db_render_state(struct si_context *sctx)
{
- struct radeon_winsys_cs *cs = sctx->gfx_cs;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
- unsigned db_shader_control;
-
- radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+ unsigned db_shader_control, db_render_control, db_count_control;
/* DB_RENDER_CONTROL */
if (sctx->dbcb_depth_copy_enabled ||
sctx->dbcb_stencil_copy_enabled) {
- radeon_emit(cs,
- S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
- S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
- S_028000_COPY_CENTROID(1) |
- S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
+ db_render_control =
+ S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
+ S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
+ S_028000_COPY_CENTROID(1) |
+ S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);
} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
- radeon_emit(cs,
- S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
- S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
+ db_render_control =
+ S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
+ S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);
} else {
- radeon_emit(cs,
- S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
- S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
+ db_render_control =
+ S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
+ S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);
}
/* DB_COUNT_CONTROL (occlusion queries) */
@@ -1373,28 +1370,33 @@ static void si_emit_db_render_state(struct si_context *sctx)
bool perfect = sctx->num_perfect_occlusion_queries > 0;
if (sctx->chip_class >= CIK) {
- radeon_emit(cs,
- S_028004_PERFECT_ZPASS_COUNTS(perfect) |
- S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
- S_028004_ZPASS_ENABLE(1) |
- S_028004_SLICE_EVEN_ENABLE(1) |
- S_028004_SLICE_ODD_ENABLE(1));
+ db_count_control =
+ S_028004_PERFECT_ZPASS_COUNTS(perfect) |
+ S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
+ S_028004_ZPASS_ENABLE(1) |
+ S_028004_SLICE_EVEN_ENABLE(1) |
+ S_028004_SLICE_ODD_ENABLE(1);
} else {
- radeon_emit(cs,
- S_028004_PERFECT_ZPASS_COUNTS(perfect) |
- S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
+ db_count_control =
+ S_028004_PERFECT_ZPASS_COUNTS(perfect) |
+ S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
}
} else {
/* Disable occlusion queries. */
if (sctx->chip_class >= CIK) {
- radeon_emit(cs, 0);
+ db_count_control = 0;
} else {
- radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
+ db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
}
}
+ radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL,
+ SI_TRACKED_DB_RENDER_CONTROL, db_render_control,
+ db_count_control);
+
/* DB_RENDER_OVERRIDE2 */
- radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+ radeon_opt_set_context_reg(sctx, R_028010_DB_RENDER_OVERRIDE2,
+ SI_TRACKED_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
@@ -1415,8 +1417,8 @@ static void si_emit_db_render_state(struct si_context *sctx)
!sctx->screen->rbplus_allowed)
db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
- radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
- db_shader_control);
+ radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
+ SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
}
/*
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index d235f31..fb5f721 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -206,6 +206,22 @@ struct si_shader_data {
uint32_t sh_base[SI_NUM_SHADERS];
};
+/* The list of registers whose emitted values are remembered by si_context. */
+enum si_tracked_reg {
+ SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */
+ SI_TRACKED_DB_COUNT_CONTROL,
+
+ SI_TRACKED_DB_RENDER_OVERRIDE2,
+ SI_TRACKED_DB_SHADER_CONTROL,
+
+ SI_NUM_TRACKED_REGS,
+};
+
+struct si_tracked_regs {
+ uint32_t reg_saved;
+ uint32_t reg_value[SI_NUM_TRACKED_REGS];
+};
+
/* Private read-write buffer slots. */
enum {
SI_ES_RING_ESGS,
--
2.7.4
More information about the mesa-dev
mailing list