[Mesa-dev] [PATCH 3/3] radeonsi: More assorted depth/stencil changes ported from r600g.

Michel Dänzer michel at daenzer.net
Thu Jan 17 10:51:45 PST 2013


From: Michel Dänzer <michel.daenzer at amd.com>

[ Squashed port of the following r600g commits: - Michel Dänzer ]

commit 428e37c2da420f7dc14a2ea265f2387270f9bee1
Author: Marek Olšák <maraeo at gmail.com>
Date:   Tue Oct 2 22:02:54 2012 +0200

    r600g: add in-place DB decompression and texturing with DB tiling

    The decompression is done in-place and only the compressed tiles are
    decompressed. Note: R6xx-R7xx can do that only with Z16 and Z32F.

    The texture unit is programmed to use non-displayable tiling and depth
    ordering of samples, so that it can fetch the texture in the native DB format.

    The latest version of the libdrm surface allocator is required for stencil
    texturing to work. The old one didn't create the mipmap tree correctly.
    We need a separate mipmap tree for stencil, because the stencil mipmap
    offsets are not really depth offsets/4.

    There are still some known bugs, but this should save some memory and it also
    improves performance a little bit in Lightsmark (especially with low
    resolutions; tested with Radeon HD 5000).

    The DB->CB copy is still used for transfers.

commit e2f623f1d6da9bc987582ff68d0471061ae44030
Author: Marek Olšák <maraeo at gmail.com>
Date:   Sat Jul 28 13:55:59 2012 +0200

    r600g: don't decompress depth or stencil if there isn't any

commit 43e226b6efb77db2247741cc2057d9625a2cfa05
Author: Marek Olšák <maraeo at gmail.com>
Date:   Wed Jul 18 00:32:50 2012 +0200

    r600g: optimize uploading depth textures

    Make it only copy the portion of a depth texture being uploaded and
    not the whole 2D layer.

    There is also a little code cleanup.

commit b242adbe5cfa165b252064a1ea36f802d8251ef1
Author: Marek Olšák <maraeo at gmail.com>
Date:   Wed Jul 18 00:17:46 2012 +0200

    r600g: remove needless wrapper r600_texture_depth_flush

commit 611dd529425281d73f1f0ad2000362d4a5525a25
Author: Marek Olšák <maraeo at gmail.com>
Date:   Wed Jul 18 00:05:14 2012 +0200

    r600g: init_flushed_depth_texture should be able to report errors

commit 80755ff56317446a8c89e611edc1fdf320d6779b
Author: Marek Olšák <maraeo at gmail.com>
Date:   Sat Jul 14 17:06:27 2012 +0200

    r600g: properly track which textures are depth

    This fixes the issue with have_depth_texture never being set to false.

commit fe1fd675565231b49d3ac53d0b4bec39d8bc6781
Author: Marek Olšák <maraeo at gmail.com>
Date:   Sun Jul 8 03:10:37 2012 +0200

    r600g: don't flush depth textures set as colorbuffers

    The only case a depth buffer can be set as a color buffer is when flushing.

    That wasn't always the case, but now this code isn't required anymore.

commit 5a17d8318ec2c20bf86275044dc8f715105a88e7
Author: Marek Olšák <maraeo at gmail.com>
Date:   Sun Jul 8 02:14:18 2012 +0200

    r600g: flush depth textures bound to vertex shaders

    This was missing/broken. There are also minor code cleanups.

commit dee58f94af833906863b0ff2955b20f3ab407e63
Author: Marek Olšák <maraeo at gmail.com>
Date:   Sun Jul 8 01:54:24 2012 +0200

    r600g: do fine-grained depth texture flushing

    - maintain a mask of which mipmap levels are dirty (instead of one big flag)
    - only flush what was requested at a given point and not the whole resource
      (most often only one level and one layer has to be flushed)

Signed-off-by: Michel Dänzer <michel.daenzer at amd.com>
---
 src/gallium/drivers/radeonsi/r600_blit.c     |  145 ++++++++++++++++++--------
 src/gallium/drivers/radeonsi/r600_resource.h |    7 +-
 src/gallium/drivers/radeonsi/r600_texture.c  |  106 +++++++++++--------
 src/gallium/drivers/radeonsi/radeonsi_pipe.c |    5 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.h |   14 ++-
 src/gallium/drivers/radeonsi/si_state.c      |   91 +++++++++-------
 src/gallium/drivers/radeonsi/si_state_draw.c |   19 ++--
 7 files changed, 251 insertions(+), 136 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c
index d600962..b7aedb1 100644
--- a/src/gallium/drivers/radeonsi/r600_blit.c
+++ b/src/gallium/drivers/radeonsi/r600_blit.c
@@ -98,39 +98,63 @@ static void r600_blitter_end(struct pipe_context *ctx)
 	r600_context_queries_resume(rctx);
 }
 
-static unsigned u_num_layers(struct pipe_resource *r, unsigned level)
+static unsigned u_max_layer(struct pipe_resource *r, unsigned level)
 {
 	switch (r->target) {
 	case PIPE_TEXTURE_CUBE:
-		return 6;
+		return 6 - 1;
 	case PIPE_TEXTURE_3D:
-		return u_minify(r->depth0, level);
+		return u_minify(r->depth0, level) - 1;
 	case PIPE_TEXTURE_1D_ARRAY:
-		return r->array_size;
 	case PIPE_TEXTURE_2D_ARRAY:
-		return r->array_size;
+		return r->array_size - 1;
 	default:
-		return 1;
+		return 0;
 	}
 }
 
 void si_blit_uncompress_depth(struct pipe_context *ctx,
 		struct r600_resource_texture *texture,
-		struct r600_resource_texture *staging)
+		struct r600_resource_texture *staging,
+		unsigned first_level, unsigned last_level,
+		unsigned first_layer, unsigned last_layer)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
-	unsigned layer, level;
+	unsigned layer, level, checked_last_layer, max_layer;
 	float depth = 1.0f;
+	const struct util_format_description *desc;
+	void *custom_dsa;
 	struct r600_resource_texture *flushed_depth_texture = staging ?
 			staging : texture->flushed_depth_texture;
 
-	if (!staging && !texture->dirty_db)
+	if (!staging && !texture->dirty_db_mask)
 		return;
 
-	for (level = 0; level <= texture->resource.b.b.last_level; level++) {
-		unsigned num_layers = u_num_layers(&texture->resource.b.b, level);
+	desc = util_format_description(flushed_depth_texture->resource.b.b.format);
+	switch (util_format_has_depth(desc) | util_format_has_stencil(desc) << 1) {
+	default:
+		assert(!"No depth or stencil to uncompress");
+	case 3:
+		custom_dsa = rctx->custom_dsa_flush_depth_stencil;
+		break;
+	case 2:
+		custom_dsa = rctx->custom_dsa_flush_stencil;
+		break;
+	case 1:
+		custom_dsa = rctx->custom_dsa_flush_depth;
+		break;
+	}
+
+	for (level = first_level; level <= last_level; level++) {
+		if (!staging && !(texture->dirty_db_mask & (1 << level)))
+			continue;
 
-		for (layer = 0; layer < num_layers; layer++) {
+		/* The smaller the mipmap level, the less layers there are
+		 * as far as 3D textures are concerned. */
+		max_layer = u_max_layer(&texture->resource.b.b, level);
+		checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
+
+		for (layer = first_layer; layer <= checked_last_layer; layer++) {
 			struct pipe_surface *zsurf, *cbsurf, surf_tmpl;
 
 			surf_tmpl.format = texture->real_format;
@@ -145,53 +169,84 @@ void si_blit_uncompress_depth(struct pipe_context *ctx,
 					(struct pipe_resource*)flushed_depth_texture, &surf_tmpl);
 
 			r600_blitter_begin(ctx, R600_DECOMPRESS);
-			util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, ~0, rctx->custom_dsa_flush, depth);
+			util_blitter_custom_depth_stencil(rctx->blitter, zsurf, cbsurf, ~0, custom_dsa, depth);
 			r600_blitter_end(ctx);
 
 			pipe_surface_reference(&zsurf, NULL);
 			pipe_surface_reference(&cbsurf, NULL);
 		}
-	}
 
-	if (!staging)
-		texture->dirty_db = FALSE;
+		/* The texture will always be dirty if some layers aren't flushed.
+		 * I don't think this case can occur though. */
+		if (!staging && first_layer == 0 && last_layer == max_layer) {
+			texture->dirty_db_mask &= ~(1 << level);
+		}
+	}
 }
 
-void si_flush_depth_textures(struct r600_context *rctx)
+static void si_blit_decompress_depth_in_place(struct r600_context *rctx,
+                                              struct r600_resource_texture *texture,
+                                              unsigned first_level, unsigned last_level,
+                                              unsigned first_layer, unsigned last_layer)
 {
-	unsigned int i;
+	struct pipe_surface *zsurf, surf_tmpl = {{0}};
+	unsigned layer, max_layer, checked_last_layer, level;
 
-	/* FIXME: This handles fragment shader textures only. */
+	surf_tmpl.format = texture->resource.b.b.format;
 
-	for (i = 0; i < rctx->ps_samplers.n_views; ++i) {
-		struct si_pipe_sampler_view *view;
-		struct r600_resource_texture *tex;
+	for (level = first_level; level <= last_level; level++) {
+		if (!(texture->dirty_db_mask & (1 << level)))
+			continue;
 
-		view = rctx->ps_samplers.views[i];
-		if (!view) continue;
+		surf_tmpl.u.tex.level = level;
 
-		tex = (struct r600_resource_texture *)view->base.texture;
-		if (!tex->is_depth)
-			continue;
+		/* The smaller the mipmap level, the less layers there are
+		 * as far as 3D textures are concerned. */
+		max_layer = u_max_layer(&texture->resource.b.b, level);
+		checked_last_layer = last_layer < max_layer ? last_layer : max_layer;
 
-		if (tex->is_flushing_texture)
-			continue;
+		for (layer = first_layer; layer <= checked_last_layer; layer++) {
+			surf_tmpl.u.tex.first_layer = layer;
+			surf_tmpl.u.tex.last_layer = layer;
+
+			zsurf = rctx->context.create_surface(&rctx->context, &texture->resource.b.b, &surf_tmpl);
 
-		si_blit_uncompress_depth(&rctx->context, tex, NULL);
+			r600_blitter_begin(&rctx->context, R600_DECOMPRESS);
+			util_blitter_custom_depth_stencil(rctx->blitter, zsurf, NULL, ~0,
+							  rctx->custom_dsa_flush_inplace,
+							  1.0f);
+			r600_blitter_end(&rctx->context);
+
+			pipe_surface_reference(&zsurf, NULL);
+		}
+
+		/* The texture will always be dirty if some layers aren't flushed.
+		 * I don't think this case occurs often though. */
+		if (first_layer == 0 && last_layer == max_layer) {
+			texture->dirty_db_mask &= ~(1 << level);
+		}
 	}
+}
+
+void si_flush_depth_textures(struct r600_context *rctx,
+			     struct r600_textures_info *textures)
+{
+	unsigned i;
 
-	/* also check CB here */
-	for (i = 0; i < rctx->framebuffer.nr_cbufs; i++) {
+	for (i = 0; i < textures->n_views; ++i) {
+		struct pipe_sampler_view *view;
 		struct r600_resource_texture *tex;
-		tex = (struct r600_resource_texture *)rctx->framebuffer.cbufs[i]->texture;
 
-		if (!tex->is_depth)
-			continue;
+		view = &textures->views[i]->base;
+		if (!view) continue;
 
-		if (tex->is_flushing_texture)
+		tex = (struct r600_resource_texture *)view->texture;
+		if (!tex->is_depth || tex->is_flushing_texture)
 			continue;
 
-		si_blit_uncompress_depth(&rctx->context, tex, NULL);
+		si_blit_decompress_depth_in_place(rctx, tex,
+						  view->u.tex.first_level, view->u.tex.last_level,
+						  0, u_max_layer(&tex->resource.b.b, view->u.tex.first_level));
 	}
 }
 
@@ -322,8 +377,12 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 		return;
 	}
 
-	if (rsrc->is_depth && !rsrc->is_flushing_texture)
-		r600_texture_depth_flush(ctx, src, NULL);
+	/* This must be done before entering u_blitter to avoid recursion. */
+	if (rsrc->is_depth && !rsrc->is_flushing_texture) {
+		si_blit_decompress_depth_in_place(rctx, rsrc,
+						  src_level, src_level,
+						  src_box->z, src_box->z + src_box->depth - 1);
+	}
 
 	restore_orig[0] = restore_orig[1] = FALSE;
 
@@ -376,8 +435,12 @@ static void si_blit(struct pipe_context *ctx,
 		return;
 	}
 
-	if (rsrc->is_depth && !rsrc->is_flushing_texture)
-		r600_texture_depth_flush(ctx, info->src.resource, NULL);
+	if (rsrc->is_depth && !rsrc->is_flushing_texture) {
+		si_blit_decompress_depth_in_place(rctx, rsrc,
+						  info->src.level, info->src.level,
+						  info->src.box.z,
+						  info->src.box.z + info->src.box.depth - 1);
+	}
 
 	r600_blitter_begin(ctx, R600_BLIT);
 	util_blitter_blit(rctx->blitter, info);
diff --git a/src/gallium/drivers/radeonsi/r600_resource.h b/src/gallium/drivers/radeonsi/r600_resource.h
index 8d80935..f7b60ed 100644
--- a/src/gallium/drivers/radeonsi/r600_resource.h
+++ b/src/gallium/drivers/radeonsi/r600_resource.h
@@ -49,7 +49,7 @@ struct r600_resource_texture {
 
 	unsigned			pitch_override;
 	unsigned			is_depth;
-	unsigned			dirty_db;
+	unsigned			dirty_db_mask; /* each bit says if that miplevel is dirty */
 	struct r600_resource_texture	*flushed_depth_texture;
 	boolean				is_flushing_texture;
 	struct radeon_surface		surface;
@@ -68,12 +68,9 @@ struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen,
 					     const struct pipe_resource *base,
 					     struct winsys_handle *whandle);
 
-void r600_init_flushed_depth_texture(struct pipe_context *ctx,
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
 				     struct pipe_resource *texture,
 				     struct r600_resource_texture **staging);
-void r600_texture_depth_flush(struct pipe_context *ctx,
-			      struct pipe_resource *texture,
-			      struct r600_resource_texture **staging);
 
 
 struct r600_context;
diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c
index b790d8d..2bbf2eb 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -331,13 +331,15 @@ static void *si_texture_transfer_map(struct pipe_context *ctx,
 		*/
 		struct r600_resource_texture *staging_depth;
 
-		r600_texture_depth_flush(ctx, texture, &staging_depth);
-		if (!staging_depth) {
+		if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
 			R600_ERR("failed to create temporary texture to hold untiled copy\n");
 			pipe_resource_reference(&trans->transfer.resource, NULL);
 			FREE(trans);
 			return NULL;
 		}
+		si_blit_uncompress_depth(ctx, rtex, staging_depth,
+					 level, level,
+					 box->z, box->z + box->depth - 1);
 		trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
 		trans->offset = r600_texture_get_offset(staging_depth, level, box->z);
 
@@ -424,22 +426,13 @@ static void si_texture_transfer_unmap(struct pipe_context *ctx,
 	}
 	rctx->ws->buffer_unmap(buf);
 
-	if (rtex->is_depth) {
-		if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
-			struct pipe_box sbox;
-
-			sbox.x = sbox.y = sbox.z = 0;
-			sbox.width = texture->width0;
-			sbox.height = texture->height0;
-			/* XXX that might be wrong */
-			sbox.depth = 1;
-
-			ctx->resource_copy_region(ctx, texture, 0, 0, 0, 0,
-						  &si_resource(rtransfer->staging)->b.b, 0,
-						  &sbox);
-		}
-	} else if (rtransfer->staging) {
-		if (transfer->usage & PIPE_TRANSFER_WRITE) {
+	if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
+		if (rtex->is_depth) {
+			ctx->resource_copy_region(ctx, texture, transfer->level,
+						  transfer->box.x, transfer->box.y, transfer->box.z,
+						  &si_resource(rtransfer->staging)->b.b, transfer->level,
+						  &transfer->box);
+		} else {
 			r600_copy_from_staging_texture(ctx, rtransfer);
 		}
 	}
@@ -461,6 +454,8 @@ static const struct u_resource_vtbl r600_texture_vtbl =
 	NULL	/* transfer_inline_write */
 };
 
+DEBUG_GET_ONCE_BOOL_OPTION(print_texdepth, "RADEON_PRINT_TEXDEPTH", FALSE);
+
 static struct r600_resource_texture *
 r600_texture_create_object(struct pipe_screen *screen,
 			   const struct pipe_resource *base,
@@ -515,6 +510,51 @@ r600_texture_create_object(struct pipe_screen *screen,
 		resource->domains = RADEON_DOMAIN_GTT | RADEON_DOMAIN_VRAM;
 	}
 
+	if (debug_get_option_print_texdepth() && rtex->is_depth) {
+		printf("Texture: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
+		       "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
+		       "bpe=%u, nsamples=%u, flags=%u\n",
+		       rtex->surface.npix_x, rtex->surface.npix_y,
+		       rtex->surface.npix_z, rtex->surface.blk_w,
+		       rtex->surface.blk_h, rtex->surface.blk_d,
+		       rtex->surface.array_size, rtex->surface.last_level,
+		       rtex->surface.bpe, rtex->surface.nsamples,
+		       rtex->surface.flags);
+		if (rtex->surface.flags & RADEON_SURF_ZBUFFER) {
+			for (int i = 0; i <= rtex->surface.last_level; i++) {
+				printf("  Z %i: offset=%llu, slice_size=%llu, npix_x=%u, "
+				       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+				       "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+				       i, rtex->surface.level[i].offset,
+				       rtex->surface.level[i].slice_size,
+				       rtex->surface.level[i].npix_x,
+				       rtex->surface.level[i].npix_y,
+				       rtex->surface.level[i].npix_z,
+				       rtex->surface.level[i].nblk_x,
+				       rtex->surface.level[i].nblk_y,
+				       rtex->surface.level[i].nblk_z,
+				       rtex->surface.level[i].pitch_bytes,
+				       rtex->surface.level[i].mode);
+			}
+		}
+		if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
+			for (int i = 0; i <= rtex->surface.last_level; i++) {
+				printf("  S %i: offset=%llu, slice_size=%llu, npix_x=%u, "
+				       "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
+				       "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
+				       i, rtex->surface.stencil_level[i].offset,
+				       rtex->surface.stencil_level[i].slice_size,
+				       rtex->surface.stencil_level[i].npix_x,
+				       rtex->surface.stencil_level[i].npix_y,
+				       rtex->surface.stencil_level[i].npix_z,
+				       rtex->surface.stencil_level[i].nblk_x,
+				       rtex->surface.stencil_level[i].nblk_y,
+				       rtex->surface.stencil_level[i].nblk_z,
+				       rtex->surface.stencil_level[i].pitch_bytes,
+				       rtex->surface.stencil_level[i].mode);
+			}
+		}
+	}
 	return rtex;
 }
 
@@ -622,7 +662,7 @@ struct pipe_resource *si_texture_from_handle(struct pipe_screen *screen,
 								  stride, 0, buf, FALSE, &surface);
 }
 
-void r600_init_flushed_depth_texture(struct pipe_context *ctx,
+bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
 				     struct pipe_resource *texture,
 				     struct r600_resource_texture **staging)
 {
@@ -632,7 +672,7 @@ void r600_init_flushed_depth_texture(struct pipe_context *ctx,
 			staging : &rtex->flushed_depth_texture;
 
 	if (!staging && rtex->flushed_depth_texture)
-		return; /* it's ready */
+		return true; /* it's ready */
 
 	resource.target = texture->target;
 	resource.format = texture->format;
@@ -649,36 +689,16 @@ void r600_init_flushed_depth_texture(struct pipe_context *ctx,
 	if (staging)
 		resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
 	else
-		rtex->dirty_db = TRUE;
+		rtex->dirty_db_mask = (1 << (resource.last_level+1)) - 1;
 
 	*flushed_depth_texture = (struct r600_resource_texture *)ctx->screen->resource_create(ctx->screen, &resource);
 	if (*flushed_depth_texture == NULL) {
 		R600_ERR("failed to create temporary texture to hold flushed depth\n");
-		return;
+		return false;
 	}
 
 	(*flushed_depth_texture)->is_flushing_texture = TRUE;
-}
-
-void r600_texture_depth_flush(struct pipe_context *ctx,
-			      struct pipe_resource *texture,
-			      struct r600_resource_texture **staging)
-{
-	struct r600_resource_texture *rtex = (struct r600_resource_texture*)texture;
-
-	r600_init_flushed_depth_texture(ctx, texture, staging);
-
-	if (staging) {
-		if (!*staging)
-			return; /* error */
-
-		si_blit_uncompress_depth(ctx, rtex, *staging);
-	} else {
-		if (!rtex->flushed_depth_texture)
-			return; /* error */
-
-		si_blit_uncompress_depth(ctx, rtex, NULL);
-	}
+	return true;
 }
 
 void si_init_surface_functions(struct r600_context *r600)
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index cbb3bc4..2f97609 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -179,7 +179,10 @@ static void r600_destroy_context(struct pipe_context *context)
 	if (rctx->dummy_pixel_shader) {
 		rctx->context.delete_fs_state(&rctx->context, rctx->dummy_pixel_shader);
 	}
-	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush);
+	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_depth_stencil);
+	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_depth);
+	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_stencil);
+	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_inplace);
 	util_unreference_framebuffer_state(&rctx->framebuffer);
 
 	util_blitter_destroy(rctx->blitter);
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index a0abdec..d0f04f4 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -87,6 +87,7 @@ struct r600_textures_info {
 	struct si_pipe_sampler_view	*views[NUM_TEX_UNITS];
 	struct si_pipe_sampler_state	*samplers[NUM_TEX_UNITS];
 	unsigned			n_views;
+	uint32_t			depth_texture_mask; /* which textures are depth */
 	unsigned			n_samplers;
 	bool				samplers_dirty;
 	bool				is_array_sampler[NUM_TEX_UNITS];
@@ -114,7 +115,10 @@ struct r600_context {
 	struct blitter_context		*blitter;
 	enum radeon_family		family;
 	enum chip_class			chip_class;
-	void				*custom_dsa_flush;
+	void				*custom_dsa_flush_depth_stencil;
+	void				*custom_dsa_flush_depth;
+	void				*custom_dsa_flush_stencil;
+	void				*custom_dsa_flush_inplace;
 	struct r600_screen		*screen;
 	struct radeon_winsys		*ws;
 	struct si_vertex_element	*vertex_elements;
@@ -141,7 +145,6 @@ struct r600_context {
 
 	struct u_upload_mgr	        *uploader;
 	struct util_slab_mempool	pool_transfers;
-	boolean				have_depth_texture, have_depth_fb;
 
 	unsigned default_ps_gprs, default_vs_gprs;
 
@@ -187,8 +190,11 @@ struct r600_context {
 void si_init_blit_functions(struct r600_context *rctx);
 void si_blit_uncompress_depth(struct pipe_context *ctx,
 		struct r600_resource_texture *texture,
-		struct r600_resource_texture *staging);
-void si_flush_depth_textures(struct r600_context *rctx);
+		struct r600_resource_texture *staging,
+		unsigned first_level, unsigned last_level,
+		unsigned first_layer, unsigned last_layer);
+void si_flush_depth_textures(struct r600_context *rctx,
+			     struct r600_textures_info *textures);
 
 /* r600_buffer.c */
 bool si_init_resource(struct r600_screen *rscreen,
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3908b77..cd40e1a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -650,7 +650,8 @@ static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
 	si_pm4_delete_state(rctx, dsa, (struct si_state_dsa *)state);
 }
 
-static void *si_create_db_flush_dsa(struct r600_context *rctx)
+static void *si_create_db_flush_dsa(struct r600_context *rctx, bool copy_depth,
+				    bool copy_stencil)
 {
 	struct pipe_depth_stencil_alpha_state dsa;
         struct si_state_dsa *state;
@@ -658,10 +659,22 @@ static void *si_create_db_flush_dsa(struct r600_context *rctx)
 	memset(&dsa, 0, sizeof(dsa));
 
 	state = rctx->context.create_depth_stencil_alpha_state(&rctx->context, &dsa);
-	si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
-		       S_028000_DEPTH_COPY(1) |
-		       S_028000_STENCIL_COPY(1) |
-		       S_028000_COPY_CENTROID(1));
+	if (copy_depth || copy_stencil) {
+		si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
+			       S_028000_DEPTH_COPY(copy_depth) |
+			       S_028000_STENCIL_COPY(copy_stencil) |
+			       S_028000_COPY_CENTROID(1));
+	} else {
+		si_pm4_set_reg(&state->pm4, R_028000_DB_RENDER_CONTROL,
+			       S_028000_DEPTH_COMPRESS_DISABLE(1) |
+			       S_028000_STENCIL_COMPRESS_DISABLE(1));
+		si_pm4_set_reg(&state->pm4, R_02800C_DB_RENDER_OVERRIDE,
+			       S_02800C_FORCE_HIZ_ENABLE(V_02800C_FORCE_DISABLE) |
+			       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+			       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE) |
+			       S_02800C_DISABLE_TILE_RATE_TILES(1));
+	}
+
         return state;
 }
 
@@ -1581,16 +1594,6 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
 	surf = (struct r600_surface *)state->cbufs[cb];
 	rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture;
 
-	if (rtex->is_depth)
-		rctx->have_depth_fb = TRUE;
-
-	if (rtex->is_depth && !rtex->is_flushing_texture) {
-		r600_init_flushed_depth_texture(&rctx->context,
-				state->cbufs[cb]->texture, NULL);
-		rtex = rtex->flushed_depth_texture;
-		assert(rtex);
-	}
-
 	offset = rtex->surface.level[level].offset;
 	if (rtex->surface.level[level].mode < RADEON_SURF_MODE_1D) {
 		offset += rtex->surface.level[level].slice_size *
@@ -1786,7 +1789,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	util_copy_framebuffer_state(&rctx->framebuffer, state);
 
 	/* build states */
-	rctx->have_depth_fb = 0;
 	rctx->export_16bpc = 0;
 	for (int i = 0; i < state->nr_cbufs; i++) {
 		si_cb(rctx, pm4, state, i);
@@ -2041,11 +2043,12 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
 {
 	struct si_pipe_sampler_view *view = CALLOC_STRUCT(si_pipe_sampler_view);
 	struct r600_resource_texture *tmp = (struct r600_resource_texture*)texture;
-	const struct util_format_description *desc = util_format_description(state->format);
+	const struct util_format_description *desc;
 	unsigned format, num_format;
 	uint32_t pitch = 0;
 	unsigned char state_swizzle[4], swizzle[4];
 	unsigned height, depth, width;
+	enum pipe_format pipe_format = state->format;
 	int first_non_void;
 	uint64_t va;
 
@@ -2064,9 +2067,26 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
 	state_swizzle[1] = state->swizzle_g;
 	state_swizzle[2] = state->swizzle_b;
 	state_swizzle[3] = state->swizzle_a;
+
+	/* Texturing with separate depth and stencil. */
+	if (tmp->is_depth && !tmp->is_flushing_texture) {
+		switch (pipe_format) {
+		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+			pipe_format = PIPE_FORMAT_Z32_FLOAT;
+			break;
+		case PIPE_FORMAT_X24S8_UINT:
+		case PIPE_FORMAT_S8X24_UINT:
+		case PIPE_FORMAT_X32_S8X24_UINT:
+			pipe_format = PIPE_FORMAT_S8_UINT;
+			break;
+		default:;
+		}
+	}
+
+	desc = util_format_description(pipe_format);
 	util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle);
 
-	first_non_void = util_format_get_first_non_void_channel(state->format);
+	first_non_void = util_format_get_first_non_void_channel(pipe_format);
 	switch (desc->channel[first_non_void].type) {
 	case UTIL_FORMAT_TYPE_FLOAT:
 		num_format = V_008F14_IMG_NUM_FORMAT_FLOAT;
@@ -2079,21 +2099,11 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
 		num_format = V_008F14_IMG_NUM_FORMAT_UNORM;
 	}
 
-	format = si_translate_texformat(ctx->screen, state->format, desc, first_non_void);
+	format = si_translate_texformat(ctx->screen, pipe_format, desc, first_non_void);
 	if (format == ~0) {
 		format = 0;
 	}
 
-	if (tmp->is_depth && !tmp->is_flushing_texture) {
-		r600_init_flushed_depth_texture(ctx, texture, NULL);
-		tmp = tmp->flushed_depth_texture;
-		if (!tmp) {
-			FREE(view);
-			return NULL;
-		}
-		texture = &tmp->resource.b.b;
-	}
-
 	view->resource = &tmp->resource;
 
 	/* not supported any more */
@@ -2102,7 +2112,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx
 	width = tmp->surface.level[0].npix_x;
 	height = tmp->surface.level[0].npix_y;
 	depth = tmp->surface.level[0].npix_z;
-	pitch = tmp->surface.level[0].nblk_x * util_format_get_blockwidth(state->format);
+	pitch = tmp->surface.level[0].nblk_x * util_format_get_blockwidth(pipe_format);
 
 	if (texture->target == PIPE_TEXTURE_1D_ARRAY) {
 	        height = 1;
@@ -2207,8 +2217,6 @@ static struct si_pm4_state *si_set_sampler_view(struct r600_context *rctx,
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 	int i, j;
 
-	rctx->have_depth_texture = FALSE;
-
 	if (!count)
 		goto out;
 
@@ -2220,11 +2228,19 @@ static struct si_pm4_state *si_set_sampler_view(struct r600_context *rctx,
 			(struct pipe_sampler_view **)&samplers->views[i],
 			views[i]);
 
-		if (resource[i]) {
+		if (views[i]) {
 			struct r600_resource_texture *rtex =
-				(struct r600_resource_texture *)views[i]->texture;
-			rctx->have_depth_texture |= rtex->is_depth && !rtex->is_flushing_texture;
+				(struct r600_resource_texture*)views[i]->texture;
+
+			if (rtex->is_depth && !rtex->is_flushing_texture) {
+				samplers->depth_texture_mask |= 1 << i;
+			} else {
+				samplers->depth_texture_mask &= ~(1 << i);
+			}
+
 			si_pm4_add_bo(pm4, resource[i]->resource, RADEON_USAGE_READ);
+		} else {
+			samplers->depth_texture_mask &= ~(1 << i);
 		}
 
 		for (j = 0; j < Elements(resource[i]->state); ++j) {
@@ -2546,7 +2562,10 @@ void si_init_state_functions(struct r600_context *rctx)
 	rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state;
 	rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state;
 	rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state;
-	rctx->custom_dsa_flush = si_create_db_flush_dsa(rctx);
+	rctx->custom_dsa_flush_depth_stencil = si_create_db_flush_dsa(rctx, true, true);
+	rctx->custom_dsa_flush_depth = si_create_db_flush_dsa(rctx, true, false);
+	rctx->custom_dsa_flush_stencil = si_create_db_flush_dsa(rctx, false, true);
+	rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false);
 
 	rctx->context.set_clip_state = si_set_clip_state;
 	rctx->context.set_scissor_state = si_set_scissor_state;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 6178d26..ef94eae 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -376,8 +376,13 @@ static void si_update_derived_state(struct r600_context *rctx)
 	unsigned ps_dirty = 0;
 
 	if (!rctx->blitter->running) {
-		if (rctx->have_depth_fb || rctx->have_depth_texture)
-			si_flush_depth_textures(rctx);
+		/* Flush depth textures which need to be flushed. */
+		if (rctx->vs_samplers.depth_texture_mask) {
+			si_flush_depth_textures(rctx, &rctx->vs_samplers);
+		}
+		if (rctx->ps_samplers.depth_texture_mask) {
+			si_flush_depth_textures(rctx, &rctx->ps_samplers);
+		}
 	}
 
 	si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
@@ -580,10 +585,12 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
 	rctx->flags |= R600_CONTEXT_DST_CACHES_DIRTY;
 
-	if (rctx->framebuffer.zsbuf)
-	{
-		struct pipe_resource *tex = rctx->framebuffer.zsbuf->texture;
-		((struct r600_resource_texture *)tex)->dirty_db = TRUE;
+	/* Set the depth buffer as dirty. */
+	if (rctx->framebuffer.zsbuf) {
+		struct pipe_surface *surf = rctx->framebuffer.zsbuf;
+		struct r600_resource_texture *rtex = (struct r600_resource_texture *)surf->texture;
+
+		rtex->dirty_db_mask |= 1 << surf->u.tex.level;
 	}
 
 	pipe_resource_reference(&ib.buffer, NULL);
-- 
1.7.10.4



More information about the mesa-dev mailing list