Mesa (master): freedreno/a6xx: re-work LRZ state tracking

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jun 4 02:49:40 UTC 2020


Module: Mesa
Branch: master
Commit: 07887c9f34c664c4e87008b9d9b76dc06a2d7c1b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=07887c9f34c664c4e87008b9d9b76dc06a2d7c1b

Author: Rob Clark <robdclark at chromium.org>
Date:   Wed Jun  3 10:06:58 2020 -0700

freedreno/a6xx: re-work LRZ state tracking

In particular, properly detect reversal of depth-test direction.
With that we can remove a lot of cases where we were unnecessarily
invalidating LRZ, which was simply papering over the direction-
reversal issue in deqp.

Signed-off-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5298>

---

 src/gallium/drivers/freedreno/a6xx/fd6_blend.c     |  17 ++--
 src/gallium/drivers/freedreno/a6xx/fd6_blend.h     |   2 +-
 src/gallium/drivers/freedreno/a6xx/fd6_context.h   |  14 +--
 src/gallium/drivers/freedreno/a6xx/fd6_draw.c      |   6 +-
 src/gallium/drivers/freedreno/a6xx/fd6_emit.c      | 100 +++++++++++++++------
 src/gallium/drivers/freedreno/a6xx/fd6_emit.h      |   6 --
 src/gallium/drivers/freedreno/a6xx/fd6_gmem.c      |   2 +-
 src/gallium/drivers/freedreno/a6xx/fd6_zsa.c       |  86 +++++++++++-------
 src/gallium/drivers/freedreno/a6xx/fd6_zsa.h       |   7 +-
 src/gallium/drivers/freedreno/freedreno_resource.h |   9 ++
 10 files changed, 162 insertions(+), 87 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c
index 16e2bfaf1b8..3d4b1005524 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.c
@@ -148,11 +148,6 @@ fd6_blend_state_create(struct pipe_context *pctx,
 		const struct pipe_blend_state *cso)
 {
 	struct fd6_blend_stateobj *so;
-	bool reads_dest = false;
-
-	if (cso->logicop_enable) {
-		reads_dest = util_logicop_reads_dest(cso->logicop_func);
-	}
 
 	so = rzalloc_size(NULL, sizeof(*so));
 	if (!so)
@@ -160,21 +155,21 @@ fd6_blend_state_create(struct pipe_context *pctx,
 
 	so->base = *cso;
 	so->ctx = fd_context(pctx);
-	so->lrz_write = true;  /* unless blend enabled for any MRT */
+
+	if (cso->logicop_enable) {
+		so->reads_dest |= util_logicop_reads_dest(cso->logicop_func);
+	}
 
 	unsigned nr = cso->independent_blend_enable ? cso->max_rt : 0;
 	for (unsigned i = 0; i <= nr; i++) {
 		const struct pipe_rt_blend_state *rt = &cso->rt[i];
 
+		so->reads_dest |= rt->blend_enable;
 		if (rt->blend_enable) {
-			so->lrz_write = false;
+			so->reads_dest = true;
 		}
 	}
 
-	if (reads_dest) {
-		so->lrz_write = false;
-	}
-
 	util_dynarray_init(&so->variants, so);
 
 	return so;
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h
index 09c4609f35a..ef7dde8cf65 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_blend.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_blend.h
@@ -48,7 +48,7 @@ struct fd6_blend_stateobj {
 	struct pipe_blend_state base;
 
 	struct fd_context *ctx;
-	bool lrz_write;
+	bool reads_dest;
 	struct util_dynarray variants;
 };
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_context.h b/src/gallium/drivers/freedreno/a6xx/fd6_context.h
index c890cdd8423..bb48ecea784 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_context.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_context.h
@@ -31,11 +31,19 @@
 #include "util/u_upload_mgr.h"
 
 #include "freedreno_context.h"
+#include "freedreno_resource.h"
 
 #include "ir3/ir3_shader.h"
 
 #include "a6xx.xml.h"
 
+struct fd6_lrz_state {
+	bool enable : 1;
+	bool write  : 1;
+	bool test   : 1;
+	enum fd_lrz_direction direction : 2;
+};
+
 struct fd6_context {
 	struct fd_context base;
 
@@ -106,15 +114,11 @@ struct fd6_context {
 		uint32_t SP_UNKNOWN_A0F8;
 	} magic;
 
-
 	struct {
 		/* previous binning/draw lrz state, which is a function of multiple
 		 * gallium stateobjs, but doesn't necessarily change as frequently:
 		 */
-		struct {
-			uint32_t gras_lrz_cntl;
-			uint32_t rb_lrz_cntl;
-		} lrz[2];
+		struct fd6_lrz_state lrz[2];
 	} last;
 };
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
index dab1cf32cd9..232262765a3 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_draw.c
@@ -217,11 +217,6 @@ fd6_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info,
 	ctx->stats.gs_regs += COND(emit.gs, ir3_shader_halfregs(emit.gs));
 	ctx->stats.fs_regs += ir3_shader_halfregs(emit.fs);
 
-	/* figure out whether we need to disable LRZ write for binning
-	 * pass using draw pass's fs:
-	 */
-	emit.no_lrz_write = emit.fs->writes_pos || emit.fs->no_earlyz || emit.fs->has_kill;
-
 	struct fd_ringbuffer *ring = ctx->batch->draw;
 
 	struct CP_DRAW_INDX_OFFSET_0 draw0 = {
@@ -493,6 +488,7 @@ fd6_clear(struct fd_context *ctx, unsigned buffers,
 		struct fd_resource *zsbuf = fd_resource(pfb->zsbuf->texture);
 		if (zsbuf->lrz && !is_z32(pfb->zsbuf->format)) {
 			zsbuf->lrz_valid = true;
+			zsbuf->lrz_direction = FD_LRZ_UNKNOWN;
 			fd6_clear_lrz(ctx->batch, zsbuf, depth);
 		}
 	}
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
index cdc8dfb85bd..bd30d69dd37 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c
@@ -585,45 +585,95 @@ build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
 	return ring;
 }
 
-static struct fd_ringbuffer *
-build_lrz(struct fd6_emit *emit, bool binning_pass)
+/**
+ * Calculate normalized LRZ state based on zsa/prog/blend state, updating
+ * the zsbuf's lrz state as necessary to detect the cases where we need
+ * to invalidate lrz.
+ */
+static struct fd6_lrz_state
+compute_lrz_state(struct fd6_emit *emit)
 {
 	struct fd_context *ctx = emit->ctx;
+	struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
+	const struct ir3_shader_variant *fs = emit->fs;
+	struct fd6_lrz_state lrz;
+
 	struct fd6_blend_stateobj *blend = fd6_blend_stateobj(ctx->blend);
 	struct fd6_zsa_stateobj *zsa = fd6_zsa_stateobj(ctx->zsa);
-	struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
 	struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
-	uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
-	uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl;
 
-	if (zsa->invalidate_lrz) {
+	lrz = zsa->lrz;
+
+	/* normalize lrz state: */
+	if (blend->reads_dest || fs->writes_pos || fs->no_earlyz || fs->has_kill) {
+		lrz.write = false;
+	}
+
+	/* if we change depthfunc direction, bail out on using LRZ.  The
+	 * LRZ buffer encodes a min/max depth value per block, but if
+	 * we switch from GT/GE <-> LT/LE, those values cannot be
+	 * interpreted properly.
+	 */
+	if (zsa->base.depth.enabled &&
+			(rsc->lrz_direction != FD_LRZ_UNKNOWN) &&
+			(rsc->lrz_direction != lrz.direction)) {
 		rsc->lrz_valid = false;
-		gras_lrz_cntl = 0;
-		rb_lrz_cntl = 0;
-	} else if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) {
-		gras_lrz_cntl = 0;
-		rb_lrz_cntl = 0;
-	} else if (binning_pass && blend->lrz_write && zsa->lrz_write) {
-		gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_LRZ_WRITE;
 	}
 
+	if (zsa->invalidate_lrz || !rsc->lrz_valid) {
+		rsc->lrz_valid = false;
+		memset(&lrz, 0, sizeof(lrz));
+	}
+
+	if (fs->no_earlyz || fs->writes_pos) {
+		lrz.enable = false;
+		lrz.write = false;
+		lrz.test = false;
+	}
+
+	/* Once we start writing to the real depth buffer, we lock in the
+	 * direction for LRZ.. if we have to skip a LRZ write for any
+	 * reason, it is still safe to have LRZ until there is a direction
+	 * reversal.  Prior to the reversal, since we disabled LRZ writes
+	 * in the "unsafe" cases, this just means that the LRZ test may
+	 * not early-discard some things that end up not passing a later
+	 * test (ie. be overly concervative).  But once you have a reversal
+	 * of direction, it is possible to increase/decrease the z value
+	 * to the point where the overly-conservative test is incorrect.
+	 */
+	if (zsa->base.depth.writemask) {
+		rsc->lrz_direction = lrz.direction;
+	}
+
+	return lrz;
+}
+
+static struct fd_ringbuffer *
+build_lrz(struct fd6_emit *emit, bool binning_pass)
+{
+	struct fd_context *ctx = emit->ctx;
 	struct fd6_context *fd6_ctx = fd6_context(ctx);
-	if ((fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl == gras_lrz_cntl) &&
-			(fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl == rb_lrz_cntl) &&
-			!ctx->last.dirty)
+	struct fd6_lrz_state lrz = compute_lrz_state(emit);
+
+	/* If the LRZ state has not changed, we can skip the emit: */
+	if (!ctx->last.dirty &&
+			!memcmp(&fd6_ctx->last.lrz[binning_pass], &lrz, sizeof(lrz)))
 		return NULL;
 
-	fd6_ctx->last.lrz[binning_pass].gras_lrz_cntl = gras_lrz_cntl;
-	fd6_ctx->last.lrz[binning_pass].rb_lrz_cntl = rb_lrz_cntl;
+	fd6_ctx->last.lrz[binning_pass] = lrz;
 
 	struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(ctx->batch->submit,
-			16, FD_RINGBUFFER_STREAMING);
-
-	OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
-	OUT_RING(ring, gras_lrz_cntl);
-
-	OUT_PKT4(ring, REG_A6XX_RB_LRZ_CNTL, 1);
-	OUT_RING(ring, rb_lrz_cntl);
+			4*4, FD_RINGBUFFER_STREAMING);
+
+	OUT_REG(ring, A6XX_GRAS_LRZ_CNTL(
+			.enable        = lrz.enable,
+			.lrz_write     = lrz.write,
+			.greater       = lrz.direction == FD_LRZ_GREATER,
+			.z_test_enable = lrz.test,
+		));
+	OUT_REG(ring, A6XX_RB_LRZ_CNTL(
+			.enable = lrz.enable,
+		));
 
 	return ring;
 }
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
index 16e54047245..c66418b1644 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h
@@ -95,12 +95,6 @@ struct fd6_emit {
 	bool no_decode_srgb;
 	bool primitive_restart;
 
-	/* in binning pass, we don't have real frag shader, so we
-	 * don't know if real draw disqualifies lrz write.  So just
-	 * figure that out up-front and stash it in the emit.
-	 */
-	bool no_lrz_write;
-
 	/* cached to avoid repeated lookups: */
 	const struct fd6_program_state *prog;
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
index 790215dbc0b..829a4d57c5d 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -1381,7 +1381,7 @@ fd6_emit_tile_fini(struct fd_batch *batch)
 		fd6_emit_ib(batch->gmem, batch->epilogue);
 
 	OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
-	OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_FC_ENABLE);
+	OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
 
 	fd6_emit_lrz_flush(ring);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c
index 6aa63b2e89f..d7c2000b424 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.c
@@ -34,34 +34,60 @@
 #include "fd6_context.h"
 #include "fd6_format.h"
 
-/* update lza state based on stencil/alpha-test func: */
+/* update lza state based on stencil-test func:
+ *
+ * Conceptually the order of the pipeline is:
+ *
+ *
+ *   FS -> Alpha-Test  ->  Stencil-Test  ->  Depth-Test
+ *                              |                |
+ *                       if wrmask != 0     if wrmask != 0
+ *                              |                |
+ *                              v                v
+ *                        Stencil-Write      Depth-Write
+ *
+ * Because Stencil-Test can have side effects (Stencil-Write) prior
+ * to depth test, in this case we potentially need to disable early
+ * lrz-test.  See:
+ *
+ * https://www.khronos.org/opengl/wiki/Per-Sample_Processing
+ */
 static void
-update_lrz_sa(struct fd6_zsa_stateobj *so, enum pipe_compare_func func)
+update_lrz_stencil(struct fd6_zsa_stateobj *so, enum pipe_compare_func func,
+		bool stencil_write)
 {
 	switch (func) {
 	case PIPE_FUNC_ALWAYS:
-		/* nothing to do for LRZ: */
+		/* nothing to do for LRZ, but for stencil test when stencil-
+		 * write is enabled, we need to disable lrz-test, since
+		 * conceptually stencil test and write happens before depth-
+		 * test:
+		 */
+		if (stencil_write) {
+			so->lrz.enable = false;
+			so->lrz.test = false;
+		}
 		break;
 	case PIPE_FUNC_NEVER:
 		/* fragment never passes, disable lrz_write for this draw: */
-		so->lrz_write = false;
+		so->lrz.write = false;
 		break;
 	default:
 		/* whether the fragment passes or not depends on result
 		 * of stencil test, which we cannot know when doing binning
 		 * pass:
-		 *
-		 * TODO we maybe don't have to invalidate_lrz, depending on
-		 * the depth/stencil func?  Ie. if there is an opaque surface
-		 * behind what is currently being drawn, we could just disable
-		 * lrz_write for a conservative but correct result?
 		 */
-		so->invalidate_lrz = true;
-		so->lrz_write = false;
+		so->lrz.write = false;
+		/* similarly to the PIPE_FUNC_ALWAY case, if there are side-
+		 * effects from stencil test we need to disable lrz-test.
+		 */
+		if (stencil_write) {
+			so->lrz.enable = false;
+			so->lrz.test = false;
+		}
 		break;
 	}
 }
-
 void *
 fd6_zsa_state_create(struct pipe_context *pctx,
 		const struct pipe_depth_stencil_alpha_state *cso)
@@ -83,35 +109,36 @@ fd6_zsa_state_create(struct pipe_context *pctx,
 			A6XX_RB_DEPTH_CNTL_Z_ENABLE |
 			A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
 
-		so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_Z_TEST_ENABLE;
+		so->lrz.test = true;
 
 		if (cso->depth.writemask) {
-			so->lrz_write = true;
+			so->lrz.write = true;
 		}
 
 		switch (cso->depth.func) {
 		case PIPE_FUNC_LESS:
 		case PIPE_FUNC_LEQUAL:
-			so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE;
-			so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
+			so->lrz.enable = true;
+			so->lrz.direction = FD_LRZ_LESS;
 			break;
 
 		case PIPE_FUNC_GREATER:
 		case PIPE_FUNC_GEQUAL:
-			so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE | A6XX_GRAS_LRZ_CNTL_GREATER;
-			so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
+			so->lrz.enable = true;
+			so->lrz.direction = FD_LRZ_GREATER;
 			break;
 
 		case PIPE_FUNC_NEVER:
-			so->gras_lrz_cntl |= A6XX_GRAS_LRZ_CNTL_ENABLE;
-			so->rb_lrz_cntl |= A6XX_RB_LRZ_CNTL_ENABLE;
-			so->lrz_write = false;
+			so->lrz.enable = true;
+			so->lrz.write = false;
+			so->lrz.direction = FD_LRZ_LESS;
 			break;
 
+		/* TODO revisit these: */
 		case PIPE_FUNC_EQUAL:
 		case PIPE_FUNC_NOTEQUAL:
 		case PIPE_FUNC_ALWAYS:
-			so->lrz_write = false;
+			so->lrz.write = false;
 			so->invalidate_lrz = true;
 			break;
 		}
@@ -127,7 +154,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
 		 * stencil test we don't really know what the updates to the
 		 * depth buffer will be.
 		 */
-		update_lrz_sa(so, s->func);
+		update_lrz_stencil(so, s->func, !!s->writemask);
 
 		so->rb_stencil_control |=
 			A6XX_RB_STENCIL_CONTROL_STENCIL_READ |
@@ -143,7 +170,7 @@ fd6_zsa_state_create(struct pipe_context *pctx,
 		if (cso->stencil[1].enabled) {
 			const struct pipe_stencil_state *bs = &cso->stencil[1];
 
-			update_lrz_sa(so, bs->func);
+			update_lrz_stencil(so, bs->func, !!bs->writemask);
 
 			so->rb_stencil_control |=
 				A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
@@ -158,19 +185,18 @@ fd6_zsa_state_create(struct pipe_context *pctx,
 	}
 
 	if (cso->alpha.enabled) {
-		/* stencil test happens before depth test, so without performing
-		 * stencil test we don't really know what the updates to the
-		 * depth buffer will be.
+		/* Alpha test is functionally a conditional discard, so we can't
+		 * write LRZ before seeing if we end up discarding or not
 		 */
-		update_lrz_sa(so, cso->alpha.func);
+		if (cso->alpha.func != PIPE_FUNC_ALWAYS) {
+			so->lrz.write = false;
+		}
 
 		uint32_t ref = cso->alpha.ref_value * 255.0;
 		so->rb_alpha_control =
 			A6XX_RB_ALPHA_CONTROL_ALPHA_TEST |
 			A6XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) |
 			A6XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
-//		so->rb_depth_control |=
-//			A6XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
 	}
 
 	so->stateobj = fd_ringbuffer_new_object(ctx->pipe, 9 * 4);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h
index f1c4f6f29b6..73bfa4fba06 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_zsa.h
@@ -34,6 +34,8 @@
 
 #include "freedreno_util.h"
 
+#include "fd6_context.h"
+
 struct fd6_zsa_stateobj {
 	struct pipe_depth_stencil_alpha_state base;
 
@@ -42,9 +44,8 @@ struct fd6_zsa_stateobj {
 	uint32_t rb_stencil_control;
 	uint32_t rb_stencilmask;
 	uint32_t rb_stencilwrmask;
-	uint32_t gras_lrz_cntl;
-	uint32_t rb_lrz_cntl;
-	bool lrz_write;
+
+	struct fd6_lrz_state lrz;
 	bool invalidate_lrz;
 
 	struct fd_ringbuffer *stateobj;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index f71cd6a8617..10b7f40f36f 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -36,6 +36,14 @@
 #include "freedreno_util.h"
 #include "freedreno/fdl/freedreno_layout.h"
 
+enum fd_lrz_direction {
+	FD_LRZ_UNKNOWN,
+	/* Depth func less/less-than: */
+	FD_LRZ_LESS,
+	/* Depth func greater/greater-than: */
+	FD_LRZ_GREATER,
+};
+
 struct fd_resource {
 	struct pipe_resource base;
 	struct fd_bo *bo;
@@ -86,6 +94,7 @@ struct fd_resource {
 	 * fdl_layout
 	 */
 	bool lrz_valid : 1;
+	enum fd_lrz_direction lrz_direction : 2;
 	uint16_t lrz_width;  // for lrz clear, does this differ from lrz_pitch?
 	uint16_t lrz_height;
 	uint16_t lrz_pitch;



More information about the mesa-commit mailing list