[Mesa-dev] [PATCH 1/6] radeonsi: emit_db_render_state packets optimization

Sonny Jiang sonny.jiang at amd.com
Thu Jun 7 16:13:48 UTC 2018


Remembering latest states of registers to eliminate redunant SET_CONTEXT_REG packets

Signed-off-by: Sonny Jiang <sonny.jiang at amd.com>
---
 src/gallium/drivers/radeonsi/si_build_pm4.h | 43 +++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_gfx_cs.c    |  3 ++
 src/gallium/drivers/radeonsi/si_pipe.h      |  2 +
 src/gallium/drivers/radeonsi/si_state.c     | 60 +++++++++++++++--------------
 src/gallium/drivers/radeonsi/si_state.h     | 16 ++++++++
 5 files changed, 95 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_build_pm4.h b/src/gallium/drivers/radeonsi/si_build_pm4.h
index 22f5558..45d943f 100644
--- a/src/gallium/drivers/radeonsi/si_build_pm4.h
+++ b/src/gallium/drivers/radeonsi/si_build_pm4.h
@@ -110,4 +110,47 @@ static inline void radeon_set_uconfig_reg_idx(struct radeon_winsys_cs *cs,
 	radeon_emit(cs, value);
 }
 
+/* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
+static inline void radeon_opt_set_context_reg(struct si_context *sctx, unsigned offset,
+					      enum si_tracked_reg reg, unsigned value)
+{
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
+
+	if (!(sctx->tracked_regs.reg_saved & (1 << reg)) ||
+	    sctx->tracked_regs.reg_value[reg] != value ) {
+
+		radeon_set_context_reg(cs, offset, value);
+
+		sctx->tracked_regs.reg_saved |= 1 << reg;
+		sctx->tracked_regs.reg_value[reg] = value;
+	}
+}
+
+/**
+ * Set 2 consecutive registers if any registers value is different.
+ * @param offset        starting register offset
+ * @param value1        is written to first register
+ * @param value2        is written to second register
+ */
+static inline void radeon_opt_set_context_reg2(struct si_context *sctx, unsigned offset,
+					       enum si_tracked_reg reg, unsigned value1,
+					       unsigned value2)
+{
+	struct radeon_winsys_cs *cs = sctx->gfx_cs;
+
+	if (!(sctx->tracked_regs.reg_saved & (1 << reg)) ||
+	    !(sctx->tracked_regs.reg_saved & (1 << (reg + 1))) ||
+	    sctx->tracked_regs.reg_value[reg] != value1 ||
+	    sctx->tracked_regs.reg_value[reg+1] != value2 ) {
+
+		radeon_set_context_reg_seq(cs, offset, 2);
+		radeon_emit(cs, value1);
+		radeon_emit(cs, value2);
+
+		sctx->tracked_regs.reg_value[reg] = value1;
+		sctx->tracked_regs.reg_value[reg+1] = value2;
+		sctx->tracked_regs.reg_saved |= 3 << reg;
+	}
+}
+
 #endif
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index ec74c1b..0b9a020 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -321,4 +321,7 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 	ctx->last_num_tcs_input_cp = -1;
 
 	ctx->cs_shader_state.initialized = false;
+
+	/* Set all saved registers state to unknown */
+	ctx->tracked_regs.reg_saved = 0;
 }
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 5d1671f..82a8263 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1033,6 +1033,8 @@ struct si_context {
 
 	void (*dma_clear_buffer)(struct si_context *sctx, struct pipe_resource *dst,
 				 uint64_t offset, uint64_t size, unsigned value);
+
+	struct si_tracked_regs			tracked_regs;
 };
 
 /* cik_sdma.c */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3a7e928..c95b929 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1343,28 +1343,25 @@ void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st)
 
 static void si_emit_db_render_state(struct si_context *sctx)
 {
-	struct radeon_winsys_cs *cs = sctx->gfx_cs;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
-	unsigned db_shader_control;
-
-	radeon_set_context_reg_seq(cs, R_028000_DB_RENDER_CONTROL, 2);
+	unsigned db_shader_control, db_render_control, db_count_control;
 
 	/* DB_RENDER_CONTROL */
 	if (sctx->dbcb_depth_copy_enabled ||
 	    sctx->dbcb_stencil_copy_enabled) {
-		radeon_emit(cs,
-			    S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
-			    S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
-			    S_028000_COPY_CENTROID(1) |
-			    S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample));
+		db_render_control =
+			S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) |
+			S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) |
+			S_028000_COPY_CENTROID(1) |
+			S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample);
 	} else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) {
-		radeon_emit(cs,
-			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
-			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
+		db_render_control =
+			S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
+			S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace);
 	} else {
-		radeon_emit(cs,
-			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
-			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
+		db_render_control =
+			S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
+			S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear);
 	}
 
 	/* DB_COUNT_CONTROL (occlusion queries) */
@@ -1373,28 +1370,33 @@ static void si_emit_db_render_state(struct si_context *sctx)
 		bool perfect = sctx->num_perfect_occlusion_queries > 0;
 
 		if (sctx->chip_class >= CIK) {
-			radeon_emit(cs,
-				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
-				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
-				    S_028004_ZPASS_ENABLE(1) |
-				    S_028004_SLICE_EVEN_ENABLE(1) |
-				    S_028004_SLICE_ODD_ENABLE(1));
+			db_count_control =
+				S_028004_PERFECT_ZPASS_COUNTS(perfect) |
+				S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples) |
+				S_028004_ZPASS_ENABLE(1) |
+				S_028004_SLICE_EVEN_ENABLE(1) |
+				S_028004_SLICE_ODD_ENABLE(1);
 		} else {
-			radeon_emit(cs,
-				    S_028004_PERFECT_ZPASS_COUNTS(perfect) |
-				    S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples));
+			db_count_control =
+				S_028004_PERFECT_ZPASS_COUNTS(perfect) |
+				S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples);
 		}
 	} else {
 		/* Disable occlusion queries. */
 		if (sctx->chip_class >= CIK) {
-			radeon_emit(cs, 0);
+			db_count_control = 0;
 		} else {
-			radeon_emit(cs, S_028004_ZPASS_INCREMENT_DISABLE(1));
+			db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
 		}
 	}
 
+	radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL,
+				    SI_TRACKED_DB_RENDER_CONTROL, db_render_control,
+				    db_count_control);
+
 	/* DB_RENDER_OVERRIDE2 */
-	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+	radeon_opt_set_context_reg(sctx,  R_028010_DB_RENDER_OVERRIDE2,
+		SI_TRACKED_DB_RENDER_OVERRIDE2,
 		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
 		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
 		S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
@@ -1415,8 +1417,8 @@ static void si_emit_db_render_state(struct si_context *sctx)
 	    !sctx->screen->rbplus_allowed)
 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
-	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
-			       db_shader_control);
+	radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL,
+				   SI_TRACKED_DB_SHADER_CONTROL, db_shader_control);
 }
 
 /*
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index d235f31..fb5f721 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -206,6 +206,22 @@ struct si_shader_data {
 	uint32_t		sh_base[SI_NUM_SHADERS];
 };
 
+/* The list of registers whose emitted values are remembered by si_context. */
+enum si_tracked_reg {
+	SI_TRACKED_DB_RENDER_CONTROL, /* 2 consecutive registers */
+	SI_TRACKED_DB_COUNT_CONTROL,
+
+	SI_TRACKED_DB_RENDER_OVERRIDE2,
+	SI_TRACKED_DB_SHADER_CONTROL,
+
+	SI_NUM_TRACKED_REGS,
+};
+
+struct si_tracked_regs {
+	uint32_t		reg_saved;
+	uint32_t		reg_value[SI_NUM_TRACKED_REGS];
+};
+
 /* Private read-write buffer slots. */
 enum {
 	SI_ES_RING_ESGS,
-- 
2.7.4



More information about the mesa-dev mailing list