Mesa (master): dri/nv10: Fake fast Z clears for pre-nv17 cards.

Francisco Jerez currojerez at kemper.freedesktop.org
Sun Oct 10 02:17:54 UTC 2010


Module: Mesa
Branch: master
Commit: e2acc7be2683fd3c295480724b02f5a497309cfd
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e2acc7be2683fd3c295480724b02f5a497309cfd

Author: Francisco Jerez <currojerez at riseup.net>
Date:   Sun Oct 10 01:39:13 2010 +0200

dri/nv10: Fake fast Z clears for pre-nv17 cards.

---

 src/mesa/drivers/dri/nouveau/nv10_context.c   |  130 +++++++++++++++++++++----
 src/mesa/drivers/dri/nouveau/nv10_driver.h    |    6 +
 src/mesa/drivers/dri/nouveau/nv10_state_fb.c  |    8 ++
 src/mesa/drivers/dri/nouveau/nv10_state_tnl.c |    3 +
 4 files changed, 127 insertions(+), 20 deletions(-)

diff --git a/src/mesa/drivers/dri/nouveau/nv10_context.c b/src/mesa/drivers/dri/nouveau/nv10_context.c
index f0e2744..3d898fd 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_context.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_context.c
@@ -61,39 +61,129 @@ use_fast_zclear(GLcontext *ctx, GLbitfield buffers)
 		fb->_Ymax == fb->Height && fb->_Ymin == 0;
 }
 
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+	return context_chipset(ctx) < 0x17 &&
+		!nctx->hierz.clear_blocked && fb->_DepthBuffer &&
+		(_mesa_get_format_bits(fb->_DepthBuffer->Format,
+				       GL_DEPTH_BITS) >= 24);
+}
+
+float
+nv10_transform_depth(GLcontext *ctx, float z)
+{
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+
+	if (nv10_use_viewport_zclear(ctx))
+		return 2097152.0 * (z + (nctx->hierz.clear_seq & 7));
+	else
+		return ctx->DrawBuffer->_DepthMaxF * z;
+}
+
 static void
-nv10_clear(GLcontext *ctx, GLbitfield buffers)
+nv10_zclear(GLcontext *ctx, GLbitfield *buffers)
+{
+	/*
+	 * Pre-nv17 cards don't have native support for fast Z clears,
+	 * but in some cases we can still "clear" the Z buffer without
+	 * actually blitting to it if we're willing to sacrifice a few
+	 * bits of depth precision.
+	 *
+	 * Each time a clear is requested we modify the viewport
+	 * transform in such a way that the old contents of the depth
+	 * buffer are clamped to the requested clear value when
+	 * they're read by the GPU.
+	 */
+	struct nouveau_context *nctx = to_nouveau_context(ctx);
+	struct gl_framebuffer *fb = ctx->DrawBuffer;
+	struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(fb);
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(
+		fb->_DepthBuffer->Wrapped)->surface;
+
+	if (nv10_use_viewport_zclear(ctx)) {
+		int x, y, w, h;
+		float z = ctx->Depth.Clear;
+		uint32_t value = pack_zs_f(s->format, z, 0);
+
+		get_scissors(fb, &x, &y, &w, &h);
+		*buffers &= ~BUFFER_BIT_DEPTH;
+
+		if (use_fast_zclear(ctx, *buffers)) {
+			if (nfb->hierz.clear_value != value) {
+				/* Don't fast clear if we're changing
+				 * the depth value. */
+				nfb->hierz.clear_value = value;
+
+			} else if (z == 0.0) {
+				nctx->hierz.clear_seq++;
+				context_dirty(ctx, ZCLEAR);
+
+				if ((nctx->hierz.clear_seq & 7) != 0 &&
+				    nctx->hierz.clear_seq != 1)
+					/* We didn't wrap around -- no need to
+					 * clear the depth buffer for real. */
+					return;
+
+			} else if (z == 1.0) {
+				nctx->hierz.clear_seq--;
+				context_dirty(ctx, ZCLEAR);
+
+				if ((nctx->hierz.clear_seq & 7) != 7)
+					/* No wrap around */
+					return;
+			}
+		}
+
+		value = pack_zs_f(s->format,
+				  (z + (nctx->hierz.clear_seq & 7)) / 8, 0);
+		context_drv(ctx)->surface_fill(ctx, s, ~0, value, x, y, w, h);
+	}
+}
+
+static void
+nv17_zclear(GLcontext *ctx, GLbitfield *buffers)
 {
 	struct nouveau_context *nctx = to_nouveau_context(ctx);
 	struct nouveau_channel *chan = context_chan(ctx);
 	struct nouveau_grobj *celsius = context_eng3d(ctx);
 	struct nouveau_framebuffer *nfb = to_nouveau_framebuffer(
 		ctx->DrawBuffer);
+	struct nouveau_surface *s = &to_nouveau_renderbuffer(
+		nfb->base._DepthBuffer->Wrapped)->surface;
 
-	nouveau_validate_framebuffer(ctx);
+	/* Clear the hierarchical depth buffer */
+	BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
+	OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
+	BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
+	OUT_RING(chan, 1);
 
-	if ((buffers & BUFFER_BIT_DEPTH) &&
-	    ctx->Depth.Mask && nfb->hierz.bo) {
-		struct nouveau_surface *s = &to_nouveau_renderbuffer(
-			nfb->base._DepthBuffer->Wrapped)->surface;
+	/* Mark the depth buffer as cleared */
+	if (use_fast_zclear(ctx, *buffers)) {
+		if (nctx->hierz.clear_seq)
+			*buffers &= ~BUFFER_BIT_DEPTH;
 
-		/* Clear the hierarchical depth buffer */
-		BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_FILL_VALUE, 1);
-		OUT_RING(chan, pack_zs_f(s->format, ctx->Depth.Clear, 0));
-		BEGIN_RING(chan, celsius, NV17TCL_LMA_DEPTH_BUFFER_CLEAR, 1);
-		OUT_RING(chan, 1);
+		nfb->hierz.clear_value =
+			pack_zs_f(s->format, ctx->Depth.Clear, 0);
+		nctx->hierz.clear_seq++;
 
-		/* Mark the depth buffer as cleared */
-		if (use_fast_zclear(ctx, buffers)) {
-			if (nctx->hierz.clear_seq)
-				buffers &= ~BUFFER_BIT_DEPTH;
+		context_dirty(ctx, ZCLEAR);
+	}
+}
 
-			nfb->hierz.clear_value =
-				pack_zs_f(s->format, ctx->Depth.Clear, 0);
-			nctx->hierz.clear_seq++;
+static void
+nv10_clear(GLcontext *ctx, GLbitfield buffers)
+{
+	nouveau_validate_framebuffer(ctx);
 
-			context_dirty(ctx, ZCLEAR);
-		}
+	if ((buffers & BUFFER_BIT_DEPTH) && ctx->Depth.Mask) {
+		if (context_chipset(ctx) >= 0x17)
+			nv17_zclear(ctx, &buffers);
+		else
+			nv10_zclear(ctx, &buffers);
 	}
 
 	nouveau_clear(ctx, buffers);
diff --git a/src/mesa/drivers/dri/nouveau/nv10_driver.h b/src/mesa/drivers/dri/nouveau/nv10_driver.h
index 340ba05..61dceab 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_driver.h
+++ b/src/mesa/drivers/dri/nouveau/nv10_driver.h
@@ -37,6 +37,12 @@ enum {
 /* nv10_context.c */
 extern const struct nouveau_driver nv10_driver;
 
+GLboolean
+nv10_use_viewport_zclear(GLcontext *ctx);
+
+float
+nv10_transform_depth(GLcontext *ctx, float z);
+
 /* nv10_render.c */
 void
 nv10_render_init(GLcontext *ctx);
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
index 98eb0e8..f9f3eba 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_fb.c
@@ -172,12 +172,15 @@ nv10_emit_viewport(GLcontext *ctx, int emit)
 {
 	struct nouveau_channel *chan = context_chan(ctx);
 	struct nouveau_grobj *celsius = context_eng3d(ctx);
+	struct gl_viewport_attrib *vp = &ctx->Viewport;
 	struct gl_framebuffer *fb = ctx->DrawBuffer;
 	float a[4] = {};
 
 	get_viewport_translate(ctx, a);
 	a[0] -= 2048;
 	a[1] -= 2048;
+	if (nv10_use_viewport_zclear(ctx))
+		a[2] = nv10_transform_depth(ctx, (vp->Far + vp->Near) / 2);
 
 	BEGIN_RING(chan, celsius, NV10TCL_VIEWPORT_TRANSLATE_X, 4);
 	OUT_RINGp(chan, a, 4);
@@ -204,5 +207,10 @@ nv10_emit_zclear(GLcontext *ctx, int emit)
 		OUT_RING(chan, nctx->hierz.clear_blocked ? 0 : 1);
 		OUT_RING(chan, nfb->hierz.clear_value |
 			 (nctx->hierz.clear_seq & 0xff));
+	} else {
+		BEGIN_RING(chan, celsius, NV10TCL_DEPTH_RANGE_NEAR, 2);
+		OUT_RINGf(chan, nv10_transform_depth(ctx, 0));
+		OUT_RINGf(chan, nv10_transform_depth(ctx, 1));
+		context_dirty(ctx, VIEWPORT);
 	}
 }
diff --git a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
index 0e592a1..6b2ede8 100644
--- a/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
+++ b/src/mesa/drivers/dri/nouveau/nv10_state_tnl.c
@@ -479,6 +479,9 @@ nv10_emit_projection(GLcontext *ctx, int emit)
 	_math_matrix_ctr(&m);
 	get_viewport_scale(ctx, m.m);
 
+	if (nv10_use_viewport_zclear(ctx))
+		m.m[MAT_SZ] /= 8;
+
 	if (nctx->fallback == HWTNL)
 		_math_matrix_mul_matrix(&m, &m, &ctx->_ModelProjectMatrix);
 




More information about the mesa-commit mailing list