[Freedreno] [PATCH 4/4] freedreno/a3xx: add support for S8 and Z32F_S8

Ilia Mirkin imirkin at alum.mit.edu
Sat Apr 25 12:00:25 PDT 2015


Enables ARB_depth_buffer_float. There is no sampling support for
interleaved Z32F_S8, so we store the two textures separately, one as
Z32F, the other as S8. As a result, we need a lot of additional logic
for restores and transfers.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
---
 docs/relnotes/10.6.0.html                          |   1 +
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c      |  13 ++-
 src/gallium/drivers/freedreno/a3xx/fd3_format.c    |   7 ++
 src/gallium/drivers/freedreno/a3xx/fd3_gmem.c      |  80 ++++++++++----
 src/gallium/drivers/freedreno/freedreno_context.c  |   8 +-
 src/gallium/drivers/freedreno/freedreno_draw.c     |  13 ++-
 src/gallium/drivers/freedreno/freedreno_gmem.c     |  29 +++--
 src/gallium/drivers/freedreno/freedreno_gmem.h     |   4 +-
 src/gallium/drivers/freedreno/freedreno_resource.c | 122 +++++++++++++++++++--
 src/gallium/drivers/freedreno/freedreno_resource.h |   3 +
 10 files changed, 236 insertions(+), 44 deletions(-)

diff --git a/docs/relnotes/10.6.0.html b/docs/relnotes/10.6.0.html
index 25b3659..b82004d 100644
--- a/docs/relnotes/10.6.0.html
+++ b/docs/relnotes/10.6.0.html
@@ -46,6 +46,7 @@ Note: some of the new features are only available with certain drivers.
 <ul>
 <li>GL_AMD_pinned_memory on r600, radeonsi</li>
 <li>GL_ARB_clip_control on i965</li>
+<li>GL_ARB_depth_buffer_float on freedreno</li>
 <li>GL_ARB_depth_clamp on freedreno</li>
 <li>GL_ARB_draw_indirect, GL_ARB_multi_draw_indirect on r600</li>
 <li>GL_ARB_draw_instanced on freedreno</li>
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index ee473e6..af08696 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -383,9 +383,17 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
 		}
 
 		struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+		enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format);
+		/* The restore blit_zs shader expects stencil in sampler 0, and depth
+		 * in sampler 1
+		 */
+		if (rsc->stencil && i == 0) {
+			rsc = rsc->stencil;
+			format = fd3_gmem_restore_format(rsc->base.b.format);
+		}
+
 		unsigned lvl = psurf[i]->u.tex.level;
 		struct fd_resource_slice *slice = fd_resource_slice(rsc, lvl);
-		enum pipe_format format = fd3_gmem_restore_format(psurf[i]->format);
 
 		debug_assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer);
 
@@ -412,6 +420,9 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
 	for (i = 0; i < bufs; i++) {
 		if (psurf[i]) {
 			struct fd_resource *rsc = fd_resource(psurf[i]->texture);
+			/* Matches above logic for blit_zs shader */
+			if (rsc->stencil && i == 0)
+				rsc = rsc->stencil;
 			unsigned lvl = psurf[i]->u.tex.level;
 			uint32_t offset = fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer);
 			OUT_RELOC(ring, rsc->bo, offset, 0, 0);
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_format.c b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
index 76cb318..ec87aa9 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_format.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_format.c
@@ -91,6 +91,8 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
 	_T(I8_UINT,    8_UINT,  NONE,     WZYX),
 	_T(I8_SINT,    8_SINT,  NONE,     WZYX),
 
+	_T(S8_UINT,    8_UINT,  R8_UNORM, WZYX),
+
 	/* 16-bit */
 	VT(R16_UNORM,   16_UNORM, NONE,     WZYX),
 	VT(R16_SNORM,   16_SNORM, NONE,     WZYX),
@@ -196,6 +198,7 @@ static struct fd3_format formats[PIPE_FORMAT_COUNT] = {
 	_T(Z24X8_UNORM,       X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
 	_T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX),
 	_T(Z32_FLOAT,         Z32_FLOAT,   R8G8B8A8_UNORM, WZYX),
+	_T(Z32_FLOAT_S8X24_UINT, Z32_FLOAT,R8G8B8A8_UNORM, WZYX),
 
 	/* 48-bit */
 	V_(R16G16B16_UNORM,   16_16_16_UNORM, NONE, WZYX),
@@ -296,6 +299,8 @@ fd3_pipe2swap(enum pipe_format format)
 enum a3xx_tex_fetchsize
 fd3_pipe2fetchsize(enum pipe_format format)
 {
+	if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+		format = PIPE_FORMAT_Z32_FLOAT;
 	switch (util_format_get_blocksizebits(format)) {
 	case 8: return TFETCH_1_BYTE;
 	case 16: return TFETCH_2_BYTE;
@@ -324,6 +329,8 @@ fd3_gmem_restore_format(enum pipe_format format)
 		return PIPE_FORMAT_R8G8B8A8_UNORM;
 	case PIPE_FORMAT_Z16_UNORM:
 		return PIPE_FORMAT_R8G8_UNORM;
+	case PIPE_FORMAT_S8_UINT:
+		return PIPE_FORMAT_R8_UNORM;
 	default:
 		return format;
 	}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
index d76acb2..7d39757 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -72,12 +72,20 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
 			struct pipe_surface *psurf = bufs[i];
 
 			rsc = fd_resource(psurf->texture);
-			slice = fd_resource_slice(rsc, psurf->u.tex.level);
-			format = fd3_pipe2color(psurf->format);
-			swap = fd3_pipe2swap(psurf->format);
 			pformat = psurf->format;
+			/* In case we're drawing to Z32F_S8, the "color" actually goes to
+			 * the stencil
+			 */
+			if (rsc->stencil) {
+				rsc = rsc->stencil;
+				pformat = rsc->base.b.format;
+				bases++;
+			}
+			slice = fd_resource_slice(rsc, psurf->u.tex.level);
+			format = fd3_pipe2color(pformat);
+			swap = fd3_pipe2swap(pformat);
 			if (decode_srgb)
-				srgb = util_format_is_srgb(psurf->format);
+				srgb = util_format_is_srgb(pformat);
 			else
 				pformat = util_format_linear(pformat);
 
@@ -299,12 +307,17 @@ emit_binning_workaround(struct fd_context *ctx)
 
 static void
 emit_gmem2mem_surf(struct fd_context *ctx,
-		enum adreno_rb_copy_control_mode mode,
-		uint32_t base, struct pipe_surface *psurf)
+				   enum adreno_rb_copy_control_mode mode,
+				   bool stencil,
+				   uint32_t base, struct pipe_surface *psurf)
 {
 	struct fd_ringbuffer *ring = ctx->ring;
 	struct fd_resource *rsc = fd_resource(psurf->texture);
 	enum pipe_format format = psurf->format;
+	if (stencil) {
+		rsc = rsc->stencil;
+		format = rsc->base.b.format;
+	}
 	struct fd_resource_slice *slice = fd_resource_slice(rsc, psurf->u.tex.level);
 	uint32_t offset = fd_resource_offset(rsc, psurf->u.tex.level,
 			psurf->u.tex.first_layer);
@@ -322,10 +335,10 @@ emit_gmem2mem_surf(struct fd_context *ctx,
 	OUT_RELOCW(ring, rsc->bo, offset, 0, -1);    /* RB_COPY_DEST_BASE */
 	OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
 	OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
-			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
+			A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(format)) |
 			A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
 			A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
-			A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format)));
+			A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(format)));
 
 	fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
 			DI_SRC_SEL_AUTO_INDEX, 2, 0, INDEX_SIZE_IGN, 0, 0, NULL);
@@ -421,9 +434,15 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 	fd3_program_emit(ring, &emit, 0, NULL);
 	fd3_emit_vertex_bufs(ring, &emit);
 
-	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
-		emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL,
-						   ctx->gmem.zsbuf_base, pfb->zsbuf);
+	if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+		if (!rsc->stencil || ctx->resolve & FD_BUFFER_DEPTH)
+			emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, false,
+							   ctx->gmem.zsbuf_base[0], pfb->zsbuf);
+		if (rsc->stencil && ctx->resolve & FD_BUFFER_STENCIL)
+			emit_gmem2mem_surf(ctx, RB_COPY_DEPTH_STENCIL, true,
+							   ctx->gmem.zsbuf_base[1], pfb->zsbuf);
+	}
 
 	if (ctx->resolve & FD_BUFFER_COLOR) {
 		for (i = 0; i < pfb->nr_cbufs; i++) {
@@ -431,7 +450,7 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
 				continue;
 			if (!(ctx->resolve & (PIPE_CLEAR_COLOR0 << i)))
 				continue;
-			emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE,
+			emit_gmem2mem_surf(ctx, RB_COPY_RESOLVE, false,
 							   ctx->gmem.cbuf_base[i], pfb->cbufs[i]);
 		}
 	}
@@ -454,6 +473,7 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
 		struct pipe_surface **psurf, uint32_t bufs, uint32_t bin_w)
 {
 	struct fd_ringbuffer *ring = ctx->ring;
+	struct pipe_surface *zsbufs[2];
 
 	assert(bufs > 0);
 
@@ -464,7 +484,8 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
 
 	emit_mrt(ring, bufs, psurf, bases, bin_w, false);
 
-	if (psurf[0] && psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
+	if (psurf[0] && (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT ||
+					 psurf[0]->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)) {
 		/* Depth is stored as unorm in gmem, so we have to write it in using a
 		 * special blit shader which writes depth.
 		 */
@@ -480,8 +501,18 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t bases[],
 				 A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(DEPTHX_32));
 		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(4 * ctx->gmem.bin_w));
 
-		OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
-		OUT_RING(ring, 0);
+		if (psurf[0]->format == PIPE_FORMAT_Z32_FLOAT) {
+			OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(0), 1);
+			OUT_RING(ring, 0);
+		} else {
+			/* The gmem_restore_tex logic will put the first buffer's stencil
+			 * as color. Supply it with the proper information to make that
+			 * happen.
+			 */
+			zsbufs[0] = zsbufs[1] = psurf[0];
+			psurf = zsbufs;
+			bufs = 2;
+		}
 	} else {
 		OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
 		OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_MRT(bufs - 1));
@@ -509,7 +540,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 				.half_precision = (fd3_half_precision(pfb->cbufs[0]) &&
 								   fd3_half_precision(pfb->cbufs[1]) &&
 								   fd3_half_precision(pfb->cbufs[2]) &&
-								   fd3_half_precision(pfb->cbufs[3])),
+								   fd3_half_precision(pfb->cbufs[3]))
 			},
 	};
 	float x0, y0, x1, y1;
@@ -592,6 +623,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 			A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
 			A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
 
+	OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
+	OUT_RING(ring, 0); /* RB_STENCIL_INFO */
+	OUT_RING(ring, 0); /* RB_STENCIL_PITCH */
+
 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
 	OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
 			A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
@@ -640,7 +675,7 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
 			emit.key.half_precision = false;
 		}
 		fd3_program_emit(ring, &emit, 1, &pfb->zsbuf);
-		emit_mem2gmem_surf(ctx, &gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
+		emit_mem2gmem_surf(ctx, gmem->zsbuf_base, &pfb->zsbuf, 1, bin_w);
 	}
 
 	OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
@@ -950,14 +985,19 @@ fd3_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
 	uint32_t reg;
 
 	OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
-	reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base);
+	reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
 	if (pfb->zsbuf) {
 		reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
 	}
 	OUT_RING(ring, reg);
 	if (pfb->zsbuf) {
-		uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
-		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+		OUT_RING(ring, A3XX_RB_DEPTH_PITCH(rsc->cpp * gmem->bin_w));
+		if (rsc->stencil) {
+			OUT_PKT0(ring, REG_A3XX_RB_STENCIL_INFO, 2);
+			OUT_RING(ring, A3XX_RB_STENCIL_INFO_STENCIL_BASE(gmem->zsbuf_base[1]));
+			OUT_RING(ring, A3XX_RB_STENCIL_PITCH(rsc->stencil->cpp * gmem->bin_w));
+		}
 	} else {
 		OUT_RING(ring, 0x00000000);
 	}
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
index 5d92da4..668ef36 100644
--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -121,8 +121,12 @@ fd_context_render(struct pipe_context *pctx)
 	for (i = 0; i < pfb->nr_cbufs; i++)
 		if (pfb->cbufs[i])
 			fd_resource(pfb->cbufs[i]->texture)->dirty = false;
-	if (pfb->zsbuf)
-		fd_resource(pfb->zsbuf->texture)->dirty = false;
+	if (pfb->zsbuf) {
+		rsc = fd_resource(pfb->zsbuf->texture);
+		rsc->dirty = false;
+		if (rsc->stencil)
+			rsc->stencil->dirty = false;
+	}
 
 	/* go through all the used resources and clear their reading flag */
 	LIST_FOR_EACH_ENTRY_SAFE(rsc, rsc_tmp, &ctx->used_resources, list) {
diff --git a/src/gallium/drivers/freedreno/freedreno_draw.c b/src/gallium/drivers/freedreno/freedreno_draw.c
index fed3e64..c9e317c 100644
--- a/src/gallium/drivers/freedreno/freedreno_draw.c
+++ b/src/gallium/drivers/freedreno/freedreno_draw.c
@@ -88,8 +88,12 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
 	}
 
 	if (fd_stencil_enabled(ctx)) {
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
 		buffers |= FD_BUFFER_STENCIL;
-		fd_resource(pfb->zsbuf->texture)->dirty = true;
+		if (rsc->stencil)
+			rsc->stencil->dirty = true;
+		else
+			rsc->dirty = true;
 		ctx->gmem_reason |= FD_GMEM_STENCIL_ENABLED;
 	}
 
@@ -215,7 +219,12 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
 				fd_resource(pfb->cbufs[i]->texture)->dirty = true;
 
 	if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
-		fd_resource(pfb->zsbuf->texture)->dirty = true;
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+		if (rsc->stencil && buffers & PIPE_CLEAR_STENCIL)
+			rsc->stencil->dirty = true;
+		if (!rsc->stencil || buffers & PIPE_CLEAR_DEPTH)
+			rsc->dirty = true;
+
 		ctx->gmem_reason |= FD_GMEM_CLEARS_DEPTH_STENCIL;
 	}
 
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
index 473d2b8..11a1b62 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -77,7 +77,7 @@ static uint32_t bin_width(struct fd_context *ctx)
 }
 
 static uint32_t
-total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp,
+total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
 		   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
 {
 	uint32_t total = 0, i;
@@ -89,9 +89,14 @@ total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp,
 		}
 	}
 
-	if (zsbuf_cpp) {
-		gmem->zsbuf_base = align(total, 0x4000);
-		total = gmem->zsbuf_base + zsbuf_cpp * bin_w * bin_h;
+	if (zsbuf_cpp[0]) {
+		gmem->zsbuf_base[0] = align(total, 0x4000);
+		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
+	}
+
+	if (zsbuf_cpp[1]) {
+		gmem->zsbuf_base[1] = align(total, 0x4000);
+		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
 	}
 
 	return total;
@@ -108,13 +113,17 @@ calculate_tiles(struct fd_context *ctx)
 	uint32_t nbins_x = 1, nbins_y = 1;
 	uint32_t bin_w, bin_h;
 	uint32_t max_width = bin_width(ctx);
-	uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp = 0;
+	uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0};
 	uint32_t i, j, t, xoff, yoff;
 	uint32_t tpp_x, tpp_y;
 	bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
 
-	if (has_zs)
-		zsbuf_cpp = util_format_get_blocksize(pfb->zsbuf->format);
+	if (has_zs) {
+		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
+		zsbuf_cpp[0] = rsc->cpp;
+		if (rsc->stencil)
+			zsbuf_cpp[1] = rsc->stencil->cpp;
+	}
 	for (i = 0; i < pfb->nr_cbufs; i++) {
 		if (pfb->cbufs[i])
 			cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
@@ -122,7 +131,7 @@ calculate_tiles(struct fd_context *ctx)
 			cbuf_cpp[i] = 4;
 	}
 
-	if (gmem->zsbuf_cpp == zsbuf_cpp &&
+	if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) &&
 		!memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) &&
 		!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
 		/* everything is up-to-date */
@@ -156,7 +165,7 @@ calculate_tiles(struct fd_context *ctx)
 	 * constraints:
 	 */
 	DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d",
-		cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp,
+		cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0],
 		width, height);
 	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
 		if (bin_w > bin_h) {
@@ -172,7 +181,7 @@ calculate_tiles(struct fd_context *ctx)
 
 	gmem->scissor = *scissor;
 	memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp));
-	gmem->zsbuf_cpp = zsbuf_cpp;
+	memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp));
 	gmem->bin_h = bin_h;
 	gmem->bin_w = bin_w;
 	gmem->nbins_x = nbins_x;
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
index 81f9b6a..5867235 100644
--- a/src/gallium/drivers/freedreno/freedreno_gmem.h
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -48,9 +48,9 @@ struct fd_tile {
 struct fd_gmem_stateobj {
 	struct pipe_scissor_state scissor;
 	uint32_t cbuf_base[4];
-	uint32_t zsbuf_base;
+	uint32_t zsbuf_base[2];
 	uint8_t cbuf_cpp[4];
-	uint8_t zsbuf_cpp;
+	uint8_t zsbuf_cpp[2];
 	uint16_t bin_h, nbins_y;
 	uint16_t bin_w, nbins_x;
 	uint16_t minx, miny;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index e8da68e..95f79df 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -27,6 +27,7 @@
  */
 
 #include "util/u_format.h"
+#include "util/u_format_zs.h"
 #include "util/u_inlines.h"
 #include "util/u_transfer.h"
 #include "util/u_string.h"
@@ -101,16 +102,51 @@ realloc_bo(struct fd_resource *rsc, uint32_t size)
 	util_range_set_empty(&rsc->valid_buffer_range);
 }
 
+/* Currently this is only used for flushing Z32_S8 texture transfers, but
+ * eventually it should handle everything.
+ */
+static void
+fd_resource_flush(struct fd_transfer *trans, const struct pipe_box *box)
+{
+	struct fd_resource *rsc = fd_resource(trans->base.resource);
+	struct fd_resource_slice *slice = fd_resource_slice(rsc, trans->base.level);
+	struct fd_resource_slice *sslice = fd_resource_slice(rsc->stencil, trans->base.level);
+	enum pipe_format format = trans->base.resource->format;
+
+	float *depth = fd_bo_map(rsc->bo) + slice->offset +
+		(trans->base.box.y + box->y) * slice->pitch * 4 + (trans->base.box.x + box->x) * 4;
+	uint8_t *stencil = fd_bo_map(rsc->stencil->bo) + sslice->offset +
+		(trans->base.box.y + box->y) * sslice->pitch + trans->base.box.x + box->x;
+
+	assert(format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
+		   format == PIPE_FORMAT_X32_S8X24_UINT);
+
+	if (format != PIPE_FORMAT_X32_S8X24_UINT)
+		util_format_z32_float_s8x24_uint_unpack_z_float(
+				depth, slice->pitch * 4,
+				trans->staging, trans->base.stride,
+				box->width, box->height);
+
+	util_format_z32_float_s8x24_uint_unpack_s_8uint(
+			stencil, sslice->pitch,
+			trans->staging, trans->base.stride,
+			box->width, box->height);
+}
+
 static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
 		struct pipe_transfer *ptrans,
 		const struct pipe_box *box)
 {
 	struct fd_resource *rsc = fd_resource(ptrans->resource);
+	struct fd_transfer *trans = fd_transfer(ptrans);
 
 	if (ptrans->resource->target == PIPE_BUFFER)
 		util_range_add(&rsc->valid_buffer_range,
 					   ptrans->box.x + box->x,
 					   ptrans->box.x + box->x + box->width);
+
+	if (trans->staging)
+		fd_resource_flush(trans, box);
 }
 
 static void
@@ -119,8 +155,19 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct fd_resource *rsc = fd_resource(ptrans->resource);
-	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED))
+	struct fd_transfer *trans = fd_transfer(ptrans);
+
+	if (trans->staging && !(ptrans->usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+		struct pipe_box box;
+		u_box_2d(0, 0, ptrans->box.width, ptrans->box.height, &box);
+		fd_resource_flush(trans, &box);
+	}
+
+	if (!(ptrans->usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
 		fd_bo_cpu_fini(rsc->bo);
+		if (rsc->stencil)
+			fd_bo_cpu_fini(rsc->stencil->bo);
+	}
 
 	util_range_add(&rsc->valid_buffer_range,
 				   ptrans->box.x,
@@ -128,6 +175,9 @@ fd_resource_transfer_unmap(struct pipe_context *pctx,
 
 	pipe_resource_reference(&ptrans->resource, NULL);
 	util_slab_free(&ctx->transfer_pool, ptrans);
+
+	if (trans->staging)
+		free(trans->staging);
 }
 
 static void *
@@ -148,7 +198,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 	char *buf;
 	int ret = 0;
 
-	DBG("prsc=%p, level=%u, usage=%x", prsc, level, usage);
+	DBG("prsc=%p, level=%u, usage=%x, box=%dx%d+%d,%d", prsc, level, usage,
+		box->width, box->height, box->x, box->y);
 
 	ptrans = util_slab_alloc(&ctx->transfer_pool);
 	if (!ptrans)
@@ -173,6 +224,8 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 
 	if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
 		realloc_bo(rsc, fd_bo_size(rsc->bo));
+		if (rsc->stencil)
+			realloc_bo(rsc->stencil, fd_bo_size(rsc->stencil->bo));
 		fd_invalidate_resource(ctx, prsc);
 	} else if ((usage & PIPE_TRANSFER_WRITE) &&
 			   prsc->target == PIPE_BUFFER &&
@@ -185,7 +238,7 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 		/* If the GPU is writing to the resource, or if it is reading from the
 		 * resource and we're trying to write to it, flush the renders.
 		 */
-		if (rsc->dirty ||
+		if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty) ||
 			((ptrans->usage & PIPE_TRANSFER_WRITE) && rsc->reading))
 			fd_context_render(pctx);
 
@@ -204,8 +257,6 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 		return NULL;
 	}
 
-	*pptrans = ptrans;
-
 	if (rsc->layer_first) {
 		offset = slice->offset +
 			box->y / util_format_get_blockheight(format) * ptrans->stride +
@@ -218,6 +269,47 @@ fd_resource_transfer_map(struct pipe_context *pctx,
 			box->z * slice->size0;
 	}
 
+	if (prsc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ||
+		prsc->format == PIPE_FORMAT_X32_S8X24_UINT) {
+		trans->base.stride = trans->base.box.width * rsc->cpp * 2;
+		trans->staging = malloc(trans->base.stride * trans->base.box.height);
+		if (!trans->staging)
+			goto fail;
+
+		/* if we're not discarding the whole range (or resource), we must copy
+		 * the real data in.
+		 */
+		if (!(usage & (PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE |
+					   PIPE_TRANSFER_DISCARD_RANGE))) {
+			struct fd_resource_slice *sslice =
+				fd_resource_slice(rsc->stencil, level);
+			void *sbuf = fd_bo_map(rsc->stencil->bo);
+			if (!sbuf)
+				goto fail;
+
+			float *depth = (float *)(buf + slice->offset +
+				box->y * slice->pitch * 4 + box->x * 4);
+			uint8_t *stencil = sbuf + sslice->offset +
+				box->y * sslice->pitch + box->x;
+
+			if (format != PIPE_FORMAT_X32_S8X24_UINT)
+				util_format_z32_float_s8x24_uint_pack_z_float(
+						trans->staging, trans->base.stride,
+						depth, slice->pitch * 4,
+						box->width, box->height);
+
+			util_format_z32_float_s8x24_uint_pack_s_8uint(
+					trans->staging, trans->base.stride,
+					stencil, sslice->pitch,
+					box->width, box->height);
+		}
+
+		buf = trans->staging;
+		offset = 0;
+	}
+
+	*pptrans = ptrans;
+
 	return buf + offset;
 
 fail:
@@ -347,7 +439,10 @@ fd_resource_create(struct pipe_screen *pscreen,
 	util_range_init(&rsc->valid_buffer_range);
 
 	rsc->base.vtbl = &fd_resource_vtbl;
-	rsc->cpp = util_format_get_blocksize(tmpl->format);
+	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+		rsc->cpp = util_format_get_blocksize(PIPE_FORMAT_Z32_FLOAT);
+	else
+		rsc->cpp = util_format_get_blocksize(tmpl->format);
 
 	assert(rsc->cpp);
 
@@ -374,6 +469,19 @@ fd_resource_create(struct pipe_screen *pscreen,
 	if (!rsc->bo)
 		goto fail;
 
+	/* There is no native Z32F_S8 sampling or rendering format, so this must
+	 * be emulated via two separate textures. The depth texture still keeps
+	 * its Z32F_S8 format though, and we also keep a reference to a separate
+	 * S8 texture.
+	 */
+	if (tmpl->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
+		struct pipe_resource stencil = *tmpl;
+		stencil.format = PIPE_FORMAT_S8_UINT;
+		rsc->stencil = fd_resource(fd_resource_create(pscreen, &stencil));
+		if (!rsc->stencil)
+			goto fail;
+	}
+
 	return prsc;
 fail:
 	fd_resource_destroy(pscreen, prsc);
@@ -567,7 +675,7 @@ fd_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
 {
 	struct fd_resource *rsc = fd_resource(prsc);
 
-	if (rsc->dirty)
+	if (rsc->dirty || (rsc->stencil && rsc->stencil->dirty))
 		fd_context_render(pctx);
 }
 
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index f80acb1..fdf3b8c 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -72,6 +72,9 @@ struct fd_resource {
 	/* buffer range that has been initialized */
 	struct util_range valid_buffer_range;
 
+	/* reference to the resource holding stencil data for a z32_s8 texture */
+	struct fd_resource *stencil;
+
 	struct list_head list;
 };
 
-- 
2.0.5



More information about the Freedreno mailing list