[Mesa-dev] [PATCH] radeonsi: implement TC-compatible HTILE

Nicolai Hähnle nhaehnle at gmail.com
Thu Oct 13 15:48:23 UTC 2016


On 12.10.2016 15:54, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> so that decompress blits aren't needed and depth texturing needs less
> memory bandwidth.
>
> Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
> HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
> The format promotion is not visible to state trackers.
>
> This is part of TC-compatible renderbuffer compression, which has 3 parts:
> DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.
>
> I don't see a measurable increase in performance though.
>
> (I tested Talos Principle and DiRT: Showdown, the latter is improved by
>  0.5%, which is almost noise, and it originally used layered Z16,
>  so at least we know that Z16 promoted to Z32F isn't slower now)

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

> ---
>  src/gallium/drivers/radeon/r600_pipe_common.h  |  3 ++
>  src/gallium/drivers/radeon/r600_texture.c      | 67 ++++++++++++++++++++++----
>  src/gallium/drivers/radeon/radeon_winsys.h     |  4 ++
>  src/gallium/drivers/radeonsi/si_blit.c         | 11 ++++-
>  src/gallium/drivers/radeonsi/si_descriptors.c  |  7 ++-
>  src/gallium/drivers/radeonsi/si_shader.c       | 18 ++++++-
>  src/gallium/drivers/radeonsi/si_state.c        | 39 +++++++++++++--
>  src/gallium/drivers/radeonsi/si_state_draw.c   |  3 +-
>  src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 57 ++++++++++++++++++++--
>  9 files changed, 185 insertions(+), 24 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 290b228..5cfcad6 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -238,27 +238,29 @@ struct r600_cmask_info {
>  	unsigned yalign;
>  	unsigned slice_tile_max;
>  	unsigned base_address_reg;
>  };
>
>  struct r600_htile_info {
>  	unsigned pitch;
>  	unsigned height;
>  	unsigned xalign;
>  	unsigned yalign;
> +	unsigned alignment;
>  };
>
>  struct r600_texture {
>  	struct r600_resource		resource;
>
>  	uint64_t			size;
>  	unsigned			num_level0_transfers;
> +	enum pipe_format		db_render_format;
>  	bool				is_depth;
>  	bool				db_compatible;
>  	bool				can_sample_z;
>  	bool				can_sample_s;
>  	unsigned			dirty_level_mask; /* each bit says if that mipmap is compressed */
>  	unsigned			stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */
>  	struct r600_texture		*flushed_depth_texture;
>  	struct radeon_surf		surface;
>
>  	/* Colorbuffer compression and fast clear. */
> @@ -266,20 +268,21 @@ struct r600_texture {
>  	struct r600_cmask_info		cmask;
>  	struct r600_resource		*cmask_buffer;
>  	uint64_t			dcc_offset; /* 0 = disabled */
>  	unsigned			cb_color_info; /* fast clear enable bit */
>  	unsigned			color_clear_value[2];
>  	unsigned			last_msaa_resolve_target_micro_mode;
>
>  	/* Depth buffer compression and fast clear. */
>  	struct r600_htile_info		htile;
>  	struct r600_resource		*htile_buffer;
> +	bool				tc_compatible_htile;
>  	bool				depth_cleared; /* if it was cleared at least once */
>  	float				depth_clear_value;
>  	bool				stencil_cleared; /* if it was cleared at least once */
>  	uint8_t				stencil_clear_value;
>
>  	bool				non_disp_tiling; /* R600-Cayman only */
>
>  	/* Whether the texture is a displayable back buffer and needs DCC
>  	 * decompression, which is expensive. Therefore, it's enabled only
>  	 * if statistics suggest that it will pay off and it's allocated
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index 57cdbcf..625d091 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -185,21 +185,22 @@ static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned leve
>  	return rtex->surface.level[level].offset +
>  	       box->z * rtex->surface.level[level].slice_size +
>  	       box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
>  	       box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
>  }
>
>  static int r600_init_surface(struct r600_common_screen *rscreen,
>  			     struct radeon_surf *surface,
>  			     const struct pipe_resource *ptex,
>  			     unsigned array_mode,
> -			     bool is_flushed_depth)
> +			     bool is_flushed_depth,
> +			     bool tc_compatible_htile)
>  {
>  	const struct util_format_description *desc =
>  		util_format_description(ptex->format);
>  	bool is_depth, is_stencil;
>
>  	is_depth = util_format_has_depth(desc);
>  	is_stencil = util_format_has_stencil(desc);
>
>  	surface->npix_x = ptex->width0;
>  	surface->npix_y = ptex->height0;
> @@ -249,25 +250,36 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
>  		surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
>  		break;
>  	case PIPE_BUFFER:
>  	default:
>  		return -EINVAL;
>  	}
>
>  	if (!is_flushed_depth && is_depth) {
>  		surface->flags |= RADEON_SURF_ZBUFFER;
>
> +		if (tc_compatible_htile &&
> +		    array_mode == RADEON_SURF_MODE_2D) {
> +			/* TC-compatible HTILE only supports Z32_FLOAT.
> +			 * Promote Z16 to Z32. DB->CB copies will convert
> +			 * the format for transfers.
> +			 */
> +			surface->bpe = 4;
> +			surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
> +		}
> +
>  		if (is_stencil) {
>  			surface->flags |= RADEON_SURF_SBUFFER |
>  					  RADEON_SURF_HAS_SBUFFER_MIPTREE;
>  		}
>  	}
> +
>  	if (rscreen->chip_class >= SI) {
>  		surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
>  	}
>
>  	if (rscreen->chip_class >= VI &&
>  	    (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
>  	     ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
>  		surface->flags |= RADEON_SURF_DISABLE_DCC;
>
>  	if (ptex->bind & PIPE_BIND_SCANOUT) {
> @@ -897,43 +909,57 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
>  	slice_elements = (width * height) / (8 * 8);
>  	slice_bytes = slice_elements * 4;
>
>  	pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
>  	base_align = num_pipes * pipe_interleave_bytes;
>
>  	rtex->htile.pitch = width;
>  	rtex->htile.height = height;
>  	rtex->htile.xalign = cl_width * 8;
>  	rtex->htile.yalign = cl_height * 8;
> +	rtex->htile.alignment = base_align;
>
>  	return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
>  		align(slice_bytes, base_align);
>  }
>
>  static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
>  					struct r600_texture *rtex)
>  {
> -	unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
> +	uint64_t htile_size, alignment;
> +	uint32_t clear_value;
> +
> +	if (rtex->tc_compatible_htile) {
> +		htile_size = rtex->surface.htile_size;
> +		alignment = rtex->surface.htile_alignment;
> +		clear_value = 0x0000030F;
> +	} else {
> +		htile_size = r600_texture_get_htile_size(rscreen, rtex);
> +		alignment = rtex->htile.alignment;
> +		clear_value = 0;
> +	}
>
>  	if (!htile_size)
>  		return;
>
>  	rtex->htile_buffer = (struct r600_resource*)
> -			     pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
> -						PIPE_USAGE_DEFAULT, htile_size);
> +			     r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
> +							PIPE_USAGE_DEFAULT,
> +							htile_size, alignment);
>  	if (rtex->htile_buffer == NULL) {
>  		/* this is not a fatal error as we can still keep rendering
>  		 * without htile buffer */
>  		R600_ERR("Failed to create buffer object for htile buffer.\n");
>  	} else {
> -		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
> -					 htile_size, 0, R600_COHERENCY_NONE);
> +		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
> +					 0, htile_size, clear_value,
> +					 R600_COHERENCY_NONE);
>  	}
>  }
>
>  void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
>  {
>  	int i;
>
>  	fprintf(f, "  Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
>  		"blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
>  		"bpe=%u, nsamples=%u, flags=0x%x, %s\n",
> @@ -960,24 +986,25 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
>
>  	if (rtex->cmask.size)
>  		fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch=%u, "
>  			"height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n",
>  			rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
>  			rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign,
>  			rtex->cmask.yalign, rtex->cmask.slice_tile_max);
>
>  	if (rtex->htile_buffer)
>  		fprintf(f, "  HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
> -			"xalign=%u, yalign=%u\n",
> +			"xalign=%u, yalign=%u, TC_compatible = %u\n",
>  			rtex->htile_buffer->b.b.width0,
>  			rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
> -			rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
> +			rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
> +			rtex->tc_compatible_htile);
>
>  	if (rtex->dcc_offset) {
>  		fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
>  			rtex->dcc_offset, rtex->surface.dcc_size,
>  			rtex->surface.dcc_alignment);
>  		for (i = 0; i <= rtex->surface.last_level; i++)
>  			fprintf(f, "  DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
>  				"fast_clear_size=%"PRIu64"\n",
>  				i, rtex->surface.level[i].dcc_enabled,
>  				rtex->surface.level[i].dcc_offset,
> @@ -1047,20 +1074,30 @@ r600_texture_create_object(struct pipe_screen *screen,
>
>  	/* don't include stencil-only formats which we don't support for rendering */
>  	rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
>
>  	rtex->surface = *surface;
>  	if (r600_setup_surface(screen, rtex, pitch_in_bytes_override, offset)) {
>  		FREE(rtex);
>  		return NULL;
>  	}
>
> +	rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
> +	assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
> +	       rtex->tc_compatible_htile);
> +
> +	/* TC-compatible HTILE only supports Z32_FLOAT. */
> +	if (rtex->tc_compatible_htile)
> +		rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
> +	else
> +		rtex->db_render_format = base->format;
> +
>  	/* Tiled depth textures utilize the non-displayable tile order.
>  	 * This must be done after r600_setup_surface.
>  	 * Applies to R600-Cayman. */
>  	rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
>  	/* Applies to GCN. */
>  	rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
>
>  	/* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
>  	 * between frames, so the only thing that can enable separate DCC
>  	 * with DRI2 is multiple slow clears within a frame.
> @@ -1234,25 +1271,34 @@ static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
>
>  	/* The allocator will switch to 1D if needed. */
>  	return RADEON_SURF_MODE_2D;
>  }
>
>  struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
>  					  const struct pipe_resource *templ)
>  {
>  	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
>  	struct radeon_surf surface = {0};
> +	bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
> +	bool tc_compatible_htile =
> +		rscreen->chip_class >= VI &&
> +		(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
> +		!(rscreen->debug_flags & DBG_NO_HYPERZ) &&
> +		!is_flushed_depth &&
> +		templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
> +		util_format_is_depth_or_stencil(templ->format);
> +
>  	int r;
>
>  	r = r600_init_surface(rscreen, &surface, templ,
>  			      r600_choose_tiling(rscreen, templ),
> -			      templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
> +			      is_flushed_depth, tc_compatible_htile);
>  	if (r) {
>  		return NULL;
>  	}
>  	r = rscreen->ws->surface_best(rscreen->ws, &surface);
>  	if (r) {
>  		return NULL;
>  	}
>  	return (struct pipe_resource *)r600_texture_create_object(screen, templ, 0,
>  								  0, NULL, &surface);
>  }
> @@ -1289,21 +1335,22 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
>  	surface.mtilea = metadata.mtilea;
>  	surface.num_banks = metadata.num_banks;
>
>  	if (metadata.macrotile == RADEON_LAYOUT_TILED)
>  		array_mode = RADEON_SURF_MODE_2D;
>  	else if (metadata.microtile == RADEON_LAYOUT_TILED)
>  		array_mode = RADEON_SURF_MODE_1D;
>  	else
>  		array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
>
> -	r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
> +	r = r600_init_surface(rscreen, &surface, templ, array_mode,
> +			      false, false);
>  	if (r) {
>  		return NULL;
>  	}
>
>  	if (metadata.scanout)
>  		surface.flags |= RADEON_SURF_SCANOUT;
>
>  	rtex = r600_texture_create_object(screen, templ, stride,
>  					  offset, buf, &surface);
>  	if (!rtex)
> diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
> index 7146737..8946209 100644
> --- a/src/gallium/drivers/radeon/radeon_winsys.h
> +++ b/src/gallium/drivers/radeon/radeon_winsys.h
> @@ -271,20 +271,21 @@ enum radeon_feature_id {
>  #define     RADEON_SURF_MODE_1D                     2
>  #define     RADEON_SURF_MODE_2D                     3
>  #define RADEON_SURF_SCANOUT                     (1 << 16)
>  #define RADEON_SURF_ZBUFFER                     (1 << 17)
>  #define RADEON_SURF_SBUFFER                     (1 << 18)
>  #define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
>  #define RADEON_SURF_HAS_SBUFFER_MIPTREE         (1 << 19)
>  #define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
>  #define RADEON_SURF_FMASK                       (1 << 21)
>  #define RADEON_SURF_DISABLE_DCC                 (1 << 22)
> +#define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)
>
>  #define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
>  #define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
>  #define RADEON_SURF_CLR(v, field)   ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
>
>  struct radeon_surf_level {
>      uint64_t                    offset;
>      uint64_t                    slice_size;
>      uint32_t                    npix_x;
>      uint32_t                    npix_y;
> @@ -337,20 +338,23 @@ struct radeon_surf {
>      /* Whether the depth miptree or stencil miptree as used by the DB are
>       * adjusted from their TC compatible form to ensure depth/stencil
>       * compatibility. If either is true, the corresponding plane cannot be
>       * sampled from.
>       */
>      bool                        depth_adjusted;
>      bool                        stencil_adjusted;
>
>      uint64_t                    dcc_size;
>      uint64_t                    dcc_alignment;
> +    /* TC-compatible HTILE only. */
> +    uint64_t                    htile_size;
> +    uint64_t                    htile_alignment;
>  };
>
>  struct radeon_bo_list_item {
>      uint64_t bo_size;
>      uint64_t vm_address;
>      uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
>  };
>
>  struct radeon_winsys {
>      /**
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index c143601..db41f56 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -325,20 +325,22 @@ si_flush_depth_texture(struct si_context *sctx,
>  		levels_s = level_mask & tex->stencil_dirty_level_mask;
>
>  		if (levels_s) {
>  			if (r600_can_sample_zs(tex, true))
>  				inplace_planes |= PIPE_MASK_S;
>  			else
>  				copy_planes |= PIPE_MASK_S;
>  		}
>  	}
>
> +	assert(!tex->tc_compatible_htile || levels_z == 0);
> +
>  	/* We may have to allocate the flushed texture here when called from
>  	 * si_decompress_subresource.
>  	 */
>  	if (copy_planes &&
>  	    (tex->flushed_depth_texture ||
>  	     r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
>  		struct r600_texture *dst = tex->flushed_depth_texture;
>  		unsigned fully_copied_levels;
>  		unsigned levels = 0;
>
> @@ -692,35 +694,40 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
>  			tex = (struct r600_texture *)fb->cbufs[i]->texture;
>  			if (tex->fmask.size == 0)
>  				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
>  		}
>  	}
>
>  	if (zstex && zstex->htile_buffer &&
>  	    zsbuf->u.tex.level == 0 &&
>  	    zsbuf->u.tex.first_layer == 0 &&
>  	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
> -		if (buffers & PIPE_CLEAR_DEPTH) {
> +		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
> +		if (buffers & PIPE_CLEAR_DEPTH &&
> +		    (!zstex->tc_compatible_htile ||
> +		     depth == 0 || depth == 1)) {
>  			/* Need to disable EXPCLEAR temporarily if clearing
>  			 * to a new value. */
>  			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
>  				sctx->db_depth_disable_expclear = true;
>  			}
>
>  			zstex->depth_clear_value = depth;
>  			sctx->framebuffer.dirty_zsbuf = true;
>  			si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
>  			sctx->db_depth_clear = true;
>  			si_mark_atom_dirty(sctx, &sctx->db_render_state);
>  		}
>
> -		if (buffers & PIPE_CLEAR_STENCIL) {
> +		/* TC-compatible HTILE only supports stencil clears to 0. */
> +		if (buffers & PIPE_CLEAR_STENCIL &&
> +		    (!zstex->tc_compatible_htile || stencil == 0)) {
>  			stencil &= 0xff;
>
>  			/* Need to disable EXPCLEAR temporarily if clearing
>  			 * to a new value. */
>  			if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
>  				sctx->db_stencil_disable_expclear = true;
>  			}
>
>  			zstex->stencil_clear_value = stencil;
>  			sctx->framebuffer.dirty_zsbuf = true;
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 350242a..19cae65 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -392,20 +392,23 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
>  	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
>  	state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(tex, base_level,
>  							     is_stencil));
>  	state[4] |= S_008F20_PITCH(pitch - 1);
>
>  	if (tex->dcc_offset && tex->surface.level[first_level].dcc_enabled) {
>  		state[6] |= S_008F28_COMPRESSION_EN(1);
>  		state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
>  			    tex->dcc_offset +
>  			    base_level_info->dcc_offset) >> 8;
> +	} else if (tex->tc_compatible_htile) {
> +		state[6] |= S_008F28_COMPRESSION_EN(1);
> +		state[7] = tex->htile_buffer->gpu_address >> 8;
>  	}
>  }
>
>  static void si_set_sampler_view(struct si_context *sctx,
>  				unsigned shader,
>  				unsigned slot, struct pipe_sampler_view *view,
>  				bool disallow_early_out)
>  {
>  	struct si_sampler_views *views = &sctx->samplers[shader].views;
>  	struct si_sampler_view *rview = (struct si_sampler_view*)view;
> @@ -501,22 +504,24 @@ static void si_set_sampler_views(struct pipe_context *ctx,
>  			samplers->compressed_colortex_mask &= ~(1u << slot);
>  			si_set_sampler_view(sctx, shader, slot, NULL, false);
>  			continue;
>  		}
>
>  		si_set_sampler_view(sctx, shader, slot, views[i], false);
>
>  		if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
>  			struct r600_texture *rtex =
>  				(struct r600_texture*)views[i]->texture;
> +			struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
>
> -			if (rtex->db_compatible) {
> +			if (rtex->db_compatible &&
> +			    (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
>  				samplers->depth_texture_mask |= 1u << slot;
>  			} else {
>  				samplers->depth_texture_mask &= ~(1u << slot);
>  			}
>  			if (is_compressed_colortex(rtex)) {
>  				samplers->compressed_colortex_mask |= 1u << slot;
>  			} else {
>  				samplers->compressed_colortex_mask &= ~(1u << slot);
>  			}
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 49d4121..621d57f 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4535,26 +4535,40 @@ static void tex_fetch_args(
>  	}
>
>  	/* Pack LOD bias value */
>  	if (opcode == TGSI_OPCODE_TXB)
>  		address[count++] = coords[3];
>  	if (opcode == TGSI_OPCODE_TXB2)
>  		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
>
>  	/* Pack depth comparison value */
>  	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
> +		LLVMValueRef z;
> +
>  		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
> -			address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
> +			z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
>  		} else {
>  			assert(ref_pos >= 0);
> -			address[count++] = coords[ref_pos];
> +			z = coords[ref_pos];
>  		}
> +
> +		/* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
> +		 * so the depth comparison value isn't clamped for Z16 and
> +		 * Z24 anymore. Do it manually here.
> +		 *
> +		 * It's unnecessary if the original texture format was
> +		 * Z32_FLOAT, but we don't know that here.
> +		 */
> +		if (ctx->screen->b.chip_class == VI)
> +			z = radeon_llvm_saturate(bld_base, z);
> +
> +		address[count++] = z;
>  	}
>
>  	/* Pack user derivatives */
>  	if (opcode == TGSI_OPCODE_TXD) {
>  		int param, num_src_deriv_channels;
>
>  		switch (target) {
>  		case TGSI_TEXTURE_3D:
>  			num_src_deriv_channels = 3;
>  			num_deriv_channels = 3;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index ad65fc2..b23749c 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -679,20 +679,23 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
>  /*
>   * inferred state between framebuffer and rasterizer
>   */
>  static void si_update_poly_offset_state(struct si_context *sctx)
>  {
>  	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
>
>  	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
>  		return;
>
> +	/* Use the user format, not db_render_format, so that the polygon
> +	 * offset behaves as expected by applications.
> +	 */
>  	switch (sctx->framebuffer.state.zsbuf->texture->format) {
>  	case PIPE_FORMAT_Z16_UNORM:
>  		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
>  		break;
>  	default: /* 24-bit */
>  		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]);
>  		break;
>  	case PIPE_FORMAT_Z32_FLOAT:
>  	case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
>  		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]);
> @@ -2133,32 +2136,32 @@ static void si_init_depth_surface(struct si_context *sctx,
>  				  struct r600_surface *surf)
>  {
>  	struct r600_texture *rtex = (struct r600_texture*)surf->base.texture;
>  	unsigned level = surf->base.u.tex.level;
>  	struct radeon_surf_level *levelinfo = &rtex->surface.level[level];
>  	unsigned format;
>  	uint32_t z_info, s_info, db_depth_info;
>  	uint64_t z_offs, s_offs;
>  	uint32_t db_htile_data_base, db_htile_surface;
>
> -	format = si_translate_dbformat(rtex->resource.b.b.format);
> +	format = si_translate_dbformat(rtex->db_render_format);
>
>  	if (format == V_028040_Z_INVALID) {
>  		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
>  	}
>  	assert(format != V_028040_Z_INVALID);
>
>  	s_offs = z_offs = rtex->resource.gpu_address;
>  	z_offs += rtex->surface.level[level].offset;
>  	s_offs += rtex->surface.stencil_level[level].offset;
>
> -	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
> +	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
>
>  	z_info = S_028040_FORMAT(format);
>  	if (rtex->resource.b.b.nr_samples > 1) {
>  		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
>  	}
>
>  	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
>  		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
>  	else
>  		s_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
> @@ -2201,27 +2204,51 @@ static void si_init_depth_surface(struct si_context *sctx,
>  			 * uses. Problem was reproduced on Verde, Bonaire,
>  			 * Tonga, and Carrizo.
>  			 *
>  			 * Disabling EXPCLEAR works around the problem.
>  			 *
>  			 * Check piglit's arb_texture_multisample-stencil-clear
>  			 * test if you want to try changing this.
>  			 */
>  			if (rtex->resource.b.b.nr_samples <= 1)
>  				s_info |= S_028044_ALLOW_EXPCLEAR(1);
> -		} else
> -			/* Use all of the htile_buffer for depth if there's no stencil. */
> +		} else if (!rtex->tc_compatible_htile) {
> +			/* Use all of the htile_buffer for depth if there's no stencil.
> +			 * This must not be set when TC-compatible HTILE is enabled
> +			 * due to a hw bug.
> +			 */
>  			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
> +		}
>
>  		uint64_t va = rtex->htile_buffer->gpu_address;
>  		db_htile_data_base = va >> 8;
>  		db_htile_surface = S_028ABC_FULL_CACHE(1);
> +
> +		if (rtex->tc_compatible_htile) {
> +			db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
> +
> +			switch (rtex->resource.b.b.nr_samples) {
> +			case 0:
> +			case 1:
> +				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
> +				break;
> +			case 2:
> +			case 4:
> +				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
> +				break;
> +			case 8:
> +				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
> +				break;
> +			default:
> +				assert(0);
> +			}
> +		}
>  	} else {
>  		db_htile_data_base = 0;
>  		db_htile_surface = 0;
>  	}
>
>  	assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
>
>  	surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
>  			      S_028008_SLICE_MAX(surf->base.u.tex.last_layer);
>  	surf->db_htile_data_base = db_htile_data_base;
> @@ -2349,20 +2376,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
>
>  		if (rtex->dcc_gather_statistics) {
>  			/* Dirty tracking must be enabled for DCC usage analysis. */
>  			sctx->framebuffer.compressed_cb_mask |= 1 << i;
>  			vi_separate_dcc_start_query(ctx, rtex);
>  		}
>  	}
>
>  	if (state->zsbuf) {
>  		surf = (struct r600_surface*)state->zsbuf;
> +		rtex = (struct r600_texture*)surf->base.texture;
>
>  		if (!surf->depth_initialized) {
>  			si_init_depth_surface(sctx, surf);
>  		}
>  		r600_context_add_resource_size(ctx, surf->base.texture);
>  	}
>
>  	si_update_poly_offset_state(sctx);
>  	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
>  	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
> @@ -3014,20 +3042,23 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
>  		 */
>  		if (tmp->flushed_depth_texture->resource.b.b.format != tmp->resource.b.b.format)
>  			pipe_format = tmp->flushed_depth_texture->resource.b.b.format;
>
>  		tmp = tmp->flushed_depth_texture;
>  	}
>
>  	surflevel = tmp->surface.level;
>
>  	if (tmp->db_compatible) {
> +		if (!view->is_stencil_sampler)
> +			pipe_format = tmp->db_render_format;
> +
>  		switch (pipe_format) {
>  		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
>  			pipe_format = PIPE_FORMAT_Z32_FLOAT;
>  			break;
>  		case PIPE_FORMAT_X8Z24_UNORM:
>  		case PIPE_FORMAT_S8_UINT_Z24_UNORM:
>  			/* Z24 is always stored like this for DB
>  			 * compatibility.
>  			 */
>  			pipe_format = PIPE_FORMAT_Z24X8_UNORM;
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index c14e852..d18137b 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -1111,21 +1111,22 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
>  	     sctx->b.family == CHIP_FIJI) &&
>  	    r600_get_strmout_en(&sctx->b)) {
>  		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
>  	}
>
>  	/* Set the depth buffer as dirty. */
>  	if (sctx->framebuffer.state.zsbuf) {
>  		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
>  		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
>
> -		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
> +		if (!rtex->tc_compatible_htile)
> +			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
>
>  		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
>  			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
>  	}
>  	if (sctx->framebuffer.compressed_cb_mask) {
>  		struct pipe_surface *surf;
>  		struct r600_texture *rtex;
>  		unsigned mask = sctx->framebuffer.compressed_cb_mask;
>
>  		do {
> diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
> index 8bfea45..1bf07a7 100644
> --- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
> +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
> @@ -130,20 +130,21 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
>        regValue.pMacroTileConfig = NULL;
>        regValue.noOfMacroEntries = 0;
>     } else {
>        regValue.pMacroTileConfig = ws->amdinfo.gb_macro_tile_mode;
>        regValue.noOfMacroEntries = ARRAY_SIZE(ws->amdinfo.gb_macro_tile_mode);
>     }
>
>     createFlags.value = 0;
>     createFlags.useTileIndex = 1;
>     createFlags.degradeBaseLevel = 1;
> +   createFlags.useHtileSliceAlign = 1;
>
>     addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
>     addrCreateInput.chipFamily = ws->family;
>     addrCreateInput.chipRevision = ws->rev_id;
>     addrCreateInput.createFlags = createFlags;
>     addrCreateInput.callbacks.allocSysMem = allocSysMem;
>     addrCreateInput.callbacks.freeSysMem = freeSysMem;
>     addrCreateInput.callbacks.debugPrint = 0;
>     addrCreateInput.regValue = regValue;
>
> @@ -153,21 +154,23 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
>
>     return addrCreateOutput.hLib;
>  }
>
>  static int compute_level(struct amdgpu_winsys *ws,
>                           struct radeon_surf *surf, bool is_stencil,
>                           unsigned level, unsigned type, bool compressed,
>                           ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
>                           ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
>                           ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
> -                         ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
> +                         ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
> +                         ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
> +                         ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
>  {
>     struct radeon_surf_level *surf_level;
>     ADDR_E_RETURNCODE ret;
>
>     AddrSurfInfoIn->mipLevel = level;
>     AddrSurfInfoIn->width = u_minify(surf->npix_x, level);
>     AddrSurfInfoIn->height = u_minify(surf->npix_y, level);
>
>     if (type == RADEON_SURF_TYPE_3D)
>        AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level);
> @@ -250,20 +253,46 @@ static int compute_level(struct amdgpu_winsys *ws,
>
>        if (ret == ADDR_OK) {
>           surf_level->dcc_offset = surf->dcc_size;
>           surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
>           surf_level->dcc_enabled = true;
>           surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
>           surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
>        }
>     }
>
> +   /* TC-compatible HTILE. */
> +   if (!is_stencil &&
> +       AddrSurfInfoIn->flags.depth &&
> +       AddrSurfInfoIn->flags.tcCompatible &&
> +       surf_level->mode == RADEON_SURF_MODE_2D &&
> +       level == 0) {
> +      AddrHtileIn->flags.tcCompatible = 1;
> +      AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
> +      AddrHtileIn->height = AddrSurfInfoOut->height;
> +      AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
> +      AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
> +      AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
> +      AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
> +      AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
> +      AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
> +
> +      ret = AddrComputeHtileInfo(ws->addrlib,
> +                                 AddrHtileIn,
> +                                 AddrHtileOut);
> +
> +      if (ret == ADDR_OK) {
> +         surf->htile_size = AddrHtileOut->htileBytes;
> +         surf->htile_alignment = AddrHtileOut->baseAlign;
> +      }
> +   }
> +
>     return 0;
>  }
>
>  #define   G_009910_MICRO_TILE_MODE(x)          (((x) >> 0) & 0x03)
>  #define   G_009910_MICRO_TILE_MODE_NEW(x)      (((x) >> 22) & 0x07)
>
>  static void set_micro_tile_mode(struct radeon_surf *surf,
>                                  struct radeon_info *info)
>  {
>     uint32_t tile_mode = info->si_tile_mode_array[surf->tiling_index[0]];
> @@ -277,32 +306,36 @@ static void set_micro_tile_mode(struct radeon_surf *surf,
>  static int amdgpu_surface_init(struct radeon_winsys *rws,
>                                 struct radeon_surf *surf)
>  {
>     struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
>     unsigned level, mode, type;
>     bool compressed;
>     ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
>     ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
>     ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
>     ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
> +   ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
> +   ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
>     ADDR_TILEINFO AddrTileInfoIn = {0};
>     ADDR_TILEINFO AddrTileInfoOut = {0};
>     int r;
>
>     r = amdgpu_surface_sanity(surf);
>     if (r)
>        return r;
>
>     AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
>     AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
>     AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
>     AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
> +   AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
> +   AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
>     AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
>
>     type = RADEON_SURF_GET(surf->flags, TYPE);
>     mode = RADEON_SURF_GET(surf->flags, MODE);
>     compressed = surf->blk_w == 4 && surf->blk_h == 4;
>
>     /* MSAA and FMASK require 2D tiling. */
>     if (surf->nsamples > 1 ||
>         (surf->flags & RADEON_SURF_FMASK))
>        mode = RADEON_SURF_MODE_2D;
> @@ -354,21 +387,26 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
>     else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
>        AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
>     else
>        AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
>
>     AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
>     AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
>     AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
>     AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
>     AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
> -   AddrSurfInfoIn.flags.degrade4Space = 1;
> +   AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
> +
> +   /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
> +    * requested, because TC-compatible HTILE requires 2D tiling.
> +    */
> +   AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible;
>
>     /* DCC notes:
>      * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
>      *   with samples >= 4.
>      * - Mipmapped array textures have low performance (discovered by a closed
>      *   driver team).
>      */
>     AddrSurfInfoIn.flags.dccCompatible = ws->info.chip_class >= VI &&
>                                          !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
>                                          !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
> @@ -436,25 +474,28 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
>           if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
>              AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
>           else
>              AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
>        }
>     }
>
>     surf->bo_size = 0;
>     surf->dcc_size = 0;
>     surf->dcc_alignment = 1;
> +   surf->htile_size = 0;
> +   surf->htile_alignment = 1;
>
>     /* Calculate texture layout information. */
>     for (level = 0; level <= surf->last_level; level++) {
>        r = compute_level(ws, surf, false, level, type, compressed,
> -                        &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
> +                        &AddrSurfInfoIn, &AddrSurfInfoOut,
> +                        &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
>        if (r)
>           return r;
>
>        if (level == 0) {
>           surf->bo_alignment = AddrSurfInfoOut.baseAlign;
>           surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1;
>           set_micro_tile_mode(surf, &ws->info);
>
>           /* For 2D modes only. */
>           if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
> @@ -468,26 +509,28 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
>              surf->macro_tile_index = 0;
>           }
>        }
>     }
>
>     /* Calculate texture layout information for stencil. */
>     if (surf->flags & RADEON_SURF_SBUFFER) {
>        AddrSurfInfoIn.bpp = 8;
>        AddrSurfInfoIn.flags.depth = 0;
>        AddrSurfInfoIn.flags.stencil = 1;
> +      AddrSurfInfoIn.flags.tcCompatible = 0;
>        /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
>        AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
>
>        for (level = 0; level <= surf->last_level; level++) {
>           r = compute_level(ws, surf, true, level, type, compressed,
> -                           &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
> +                           &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut,
> +                           NULL, NULL);
>           if (r)
>              return r;
>
>           /* DB uses the depth pitch for both stencil and depth. */
>           if (surf->stencil_level[level].nblk_x != surf->level[level].nblk_x)
>              surf->stencil_adjusted = true;
>
>           if (level == 0) {
>              /* For 2D modes only. */
>              if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
> @@ -501,20 +544,26 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
>     /* Recalculate the whole DCC miptree size including disabled levels.
>      * This is what addrlib does, but calling addrlib would be a lot more
>      * complicated.
>      */
>     if (surf->dcc_size && surf->last_level > 0) {
>        surf->dcc_size = align64(surf->bo_size >> 8,
>                                 ws->info.pipe_interleave_bytes *
>                                 ws->info.num_tile_pipes);
>     }
>
> +   /* Make sure HTILE covers the whole miptree, because the shader reads
> +    * TC-compatible HTILE even for levels where it's disabled by DB.
> +    */
> +   if (surf->htile_size && surf->last_level)
> +	   surf->htile_size *= 2;
> +
>     return 0;
>  }
>
>  static int amdgpu_surface_best(struct radeon_winsys *rws,
>                                 struct radeon_surf *surf)
>  {
>     return 0;
>  }
>
>  void amdgpu_surface_init_functions(struct amdgpu_winsys *ws)
>


More information about the mesa-dev mailing list