[Mesa-dev] [PATCH 5/5] radeonsi: implement fast stencil clear

Marek Olšák maraeo at gmail.com
Thu Dec 10 05:10:21 PST 2015


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.h |  2 ++
 src/gallium/drivers/radeonsi/si_blit.c        | 46 ++++++++++++++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.h        |  2 ++
 src/gallium/drivers/radeonsi/si_state.c       | 26 ++++++++-------
 4 files changed, 53 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index a191fd0..c3933b1d 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -221,6 +221,8 @@ struct r600_texture {
 	struct r600_resource		*htile_buffer;
 	bool				depth_cleared; /* if it was cleared at least once */
 	float				depth_clear_value;
+	bool				stencil_cleared; /* if it was cleared at least once */
+	uint8_t				stencil_clear_value;
 
 	bool				non_disp_tiling; /* R600-Cayman only */
 };
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 13d8e6f..75a9d56 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -377,22 +377,39 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		}
 	}
 
-	if (buffers & PIPE_CLEAR_DEPTH &&
-	    zstex && zstex->htile_buffer &&
+	if (zstex && zstex->htile_buffer &&
 	    zsbuf->u.tex.level == 0 &&
 	    zsbuf->u.tex.first_layer == 0 &&
 	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
-		/* Need to disable EXPCLEAR temporarily if clearing
-		 * to a new value. */
-		if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
-			sctx->db_depth_disable_expclear = true;
+		if (buffers & PIPE_CLEAR_DEPTH) {
+			/* Need to disable EXPCLEAR temporarily if clearing
+			 * to a new value. */
+			if (zstex->depth_cleared && zstex->depth_clear_value != depth) {
+				sctx->db_depth_disable_expclear = true;
+			}
+
+			zstex->depth_clear_value = depth;
+			sctx->framebuffer.dirty_zsbuf = true;
+			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
+			sctx->db_depth_clear = true;
+			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 		}
 
-		zstex->depth_clear_value = depth;
-		sctx->framebuffer.dirty_zsbuf = true;
-		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
-		sctx->db_depth_clear = true;
-		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		if (buffers & PIPE_CLEAR_STENCIL) {
+			stencil &= 0xff;
+
+			/* Need to disable EXPCLEAR temporarily if clearing
+			 * to a new value. */
+			if (zstex->stencil_cleared && zstex->stencil_clear_value != stencil) {
+				sctx->db_stencil_disable_expclear = true;
+			}
+
+			zstex->stencil_clear_value = stencil;
+			sctx->framebuffer.dirty_zsbuf = true;
+			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
+			sctx->db_stencil_clear = true;
+			si_mark_atom_dirty(sctx, &sctx->db_render_state);
+		}
 	}
 
 	si_blitter_begin(ctx, SI_CLEAR);
@@ -407,6 +424,13 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 		zstex->depth_cleared = true;
 		si_mark_atom_dirty(sctx, &sctx->db_render_state);
 	}
+
+	if (sctx->db_stencil_clear) {
+		sctx->db_stencil_clear = false;
+		sctx->db_stencil_disable_expclear = false;
+		zstex->stencil_cleared = true;
+		si_mark_atom_dirty(sctx, &sctx->db_render_state);
+	}
 }
 
 static void si_clear_render_target(struct pipe_context *ctx,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 834c358..65c7e19 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -253,6 +253,8 @@ struct si_context {
 	bool			db_flush_stencil_inplace;
 	bool			db_depth_clear;
 	bool			db_depth_disable_expclear;
+	bool			db_stencil_clear;
+	bool			db_stencil_disable_expclear;
 	unsigned		ps_db_shader_control;
 
 	/* Emitted draw state. */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 9038704..3e6078a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1090,10 +1090,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 		radeon_emit(cs,
 			    S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) |
 			    S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace));
-	} else if (sctx->db_depth_clear) {
-		radeon_emit(cs, S_028000_DEPTH_CLEAR_ENABLE(1));
 	} else {
-		radeon_emit(cs, 0);
+		radeon_emit(cs,
+			    S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) |
+			    S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear));
 	}
 
 	/* DB_COUNT_CONTROL (occlusion queries) */
@@ -1120,12 +1120,9 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	}
 
 	/* DB_RENDER_OVERRIDE2 */
-	if (sctx->db_depth_disable_expclear) {
-		radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
-			S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(1));
-	} else {
-		radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, 0);
-	}
+	radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2,
+		S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
+		S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear));
 
 	db_shader_control = S_02880C_ALPHA_TO_MASK_DISABLE(sctx->framebuffer.cb0_is_integer) |
 		            sctx->ps_db_shader_control;
@@ -2240,7 +2237,10 @@ static void si_init_depth_surface(struct si_context *sctx,
 		z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
 			  S_028040_ALLOW_EXPCLEAR(1);
 
-		if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
+		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+			s_info |= S_028044_ALLOW_EXPCLEAR(1);
+		else
+			/* Use all of the htile_buffer for depth if there's no stencil. */
 			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
 
 		uint64_t va = rtex->htile_buffer->gpu_address;
@@ -2509,8 +2509,11 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 		radeon_emit(cs, zb->db_depth_size);	/* R_028058_DB_DEPTH_SIZE */
 		radeon_emit(cs, zb->db_depth_slice);	/* R_02805C_DB_DEPTH_SLICE */
 
+		radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
+		radeon_emit(cs, rtex->stencil_clear_value); /* R_028028_DB_STENCIL_CLEAR */
+		radeon_emit(cs, fui(rtex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
+
 		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
-		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
 		radeon_set_context_reg(cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
 				       zb->pa_su_poly_offset_db_fmt_cntl);
 	} else if (sctx->framebuffer.dirty_zsbuf) {
@@ -3606,7 +3609,6 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BF4_PA_CL_GB_HORZ_DISC_ADJ, fui(1.0));
-	si_pm4_set_reg(pm4, R_028028_DB_STENCIL_CLEAR, 0);
 	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
 	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
 	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
-- 
2.1.4



More information about the mesa-dev mailing list