Mesa (master): radeonsi: implement TC-compatible HTILE

Marek Olšák mareko at kemper.freedesktop.org
Thu Oct 13 17:05:51 UTC 2016


Module: Mesa
Branch: master
Commit: d4d9ec55c589156df4edc227a86b4a8c41048d58
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4d9ec55c589156df4edc227a86b4a8c41048d58

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Tue Oct 11 23:19:46 2016 +0200

radeonsi: implement TC-compatible HTILE

so that decompress blits aren't needed and depth texturing needs less
memory bandwidth.

Z16 and Z24 are promoted to Z32_FLOAT by the driver, because TC-compatible
HTILE only supports Z32_FLOAT. This doubles memory footprint for Z16.
The format promotion is not visible to state trackers.

This is part of TC-compatible renderbuffer compression, which has 3 parts:
DCC, HTILE, FMASK. Only TC-compatible FMASK compression is missing now.

I don't see a measurable increase in performance though.

(I tested Talos Principle and DiRT: Showdown, the latter is improved by
 0.5%, which is almost noise, and it originally used layered Z16,
 so at least we know that Z16 promoted to Z32F isn't slower now)

Tested-by: Edmondo Tommasina <edmondo.tommasina at gmail.com>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

---

 src/gallium/drivers/radeon/r600_pipe_common.h  |  3 ++
 src/gallium/drivers/radeon/r600_texture.c      | 67 ++++++++++++++++++++++----
 src/gallium/drivers/radeon/radeon_winsys.h     |  4 ++
 src/gallium/drivers/radeonsi/si_blit.c         | 11 ++++-
 src/gallium/drivers/radeonsi/si_descriptors.c  |  7 ++-
 src/gallium/drivers/radeonsi/si_shader.c       | 18 ++++++-
 src/gallium/drivers/radeonsi/si_state.c        | 39 +++++++++++++--
 src/gallium/drivers/radeonsi/si_state_draw.c   |  3 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_surface.c | 57 ++++++++++++++++++++--
 9 files changed, 185 insertions(+), 24 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 290b228..5cfcad6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -245,6 +245,7 @@ struct r600_htile_info {
 	unsigned height;
 	unsigned xalign;
 	unsigned yalign;
+	unsigned alignment;
 };
 
 struct r600_texture {
@@ -252,6 +253,7 @@ struct r600_texture {
 
 	uint64_t			size;
 	unsigned			num_level0_transfers;
+	enum pipe_format		db_render_format;
 	bool				is_depth;
 	bool				db_compatible;
 	bool				can_sample_z;
@@ -273,6 +275,7 @@ struct r600_texture {
 	/* Depth buffer compression and fast clear. */
 	struct r600_htile_info		htile;
 	struct r600_resource		*htile_buffer;
+	bool				tc_compatible_htile;
 	bool				depth_cleared; /* if it was cleared at least once */
 	float				depth_clear_value;
 	bool				stencil_cleared; /* if it was cleared at least once */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 57cdbcf..625d091 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -192,7 +192,8 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
 			     struct radeon_surf *surface,
 			     const struct pipe_resource *ptex,
 			     unsigned array_mode,
-			     bool is_flushed_depth)
+			     bool is_flushed_depth,
+			     bool tc_compatible_htile)
 {
 	const struct util_format_description *desc =
 		util_format_description(ptex->format);
@@ -256,11 +257,22 @@ static int r600_init_surface(struct r600_common_screen *rscreen,
 	if (!is_flushed_depth && is_depth) {
 		surface->flags |= RADEON_SURF_ZBUFFER;
 
+		if (tc_compatible_htile &&
+		    array_mode == RADEON_SURF_MODE_2D) {
+			/* TC-compatible HTILE only supports Z32_FLOAT.
+			 * Promote Z16 to Z32. DB->CB copies will convert
+			 * the format for transfers.
+			 */
+			surface->bpe = 4;
+			surface->flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
+		}
+
 		if (is_stencil) {
 			surface->flags |= RADEON_SURF_SBUFFER |
 					  RADEON_SURF_HAS_SBUFFER_MIPTREE;
 		}
 	}
+
 	if (rscreen->chip_class >= SI) {
 		surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
 	}
@@ -904,6 +916,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
 	rtex->htile.height = height;
 	rtex->htile.xalign = cl_width * 8;
 	rtex->htile.yalign = cl_height * 8;
+	rtex->htile.alignment = base_align;
 
 	return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
 		align(slice_bytes, base_align);
@@ -912,21 +925,34 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
 static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
 					struct r600_texture *rtex)
 {
-	unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
+	uint64_t htile_size, alignment;
+	uint32_t clear_value;
+
+	if (rtex->tc_compatible_htile) {
+		htile_size = rtex->surface.htile_size;
+		alignment = rtex->surface.htile_alignment;
+		clear_value = 0x0000030F;
+	} else {
+		htile_size = r600_texture_get_htile_size(rscreen, rtex);
+		alignment = rtex->htile.alignment;
+		clear_value = 0;
+	}
 
 	if (!htile_size)
 		return;
 
 	rtex->htile_buffer = (struct r600_resource*)
-			     pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
-						PIPE_USAGE_DEFAULT, htile_size);
+			     r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
+							PIPE_USAGE_DEFAULT,
+							htile_size, alignment);
 	if (rtex->htile_buffer == NULL) {
 		/* this is not a fatal error as we can still keep rendering
 		 * without htile buffer */
 		R600_ERR("Failed to create buffer object for htile buffer.\n");
 	} else {
-		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
-					 htile_size, 0, R600_COHERENCY_NONE);
+		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
+					 0, htile_size, clear_value,
+					 R600_COHERENCY_NONE);
 	}
 }
 
@@ -967,10 +993,11 @@ void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
 
 	if (rtex->htile_buffer)
 		fprintf(f, "  HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
-			"xalign=%u, yalign=%u\n",
+			"xalign=%u, yalign=%u, TC_compatible = %u\n",
 			rtex->htile_buffer->b.b.width0,
 			rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
-			rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
+			rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign,
+			rtex->tc_compatible_htile);
 
 	if (rtex->dcc_offset) {
 		fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%"PRIu64"\n",
@@ -1054,6 +1081,16 @@ r600_texture_create_object(struct pipe_screen *screen,
 		return NULL;
 	}
 
+	rtex->tc_compatible_htile = rtex->surface.htile_size != 0;
+	assert(!!(rtex->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE) ==
+	       rtex->tc_compatible_htile);
+
+	/* TC-compatible HTILE only supports Z32_FLOAT. */
+	if (rtex->tc_compatible_htile)
+		rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
+	else
+		rtex->db_render_format = base->format;
+
 	/* Tiled depth textures utilize the non-displayable tile order.
 	 * This must be done after r600_setup_surface.
 	 * Applies to R600-Cayman. */
@@ -1241,11 +1278,20 @@ struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
 {
 	struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
 	struct radeon_surf surface = {0};
+	bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
+	bool tc_compatible_htile =
+		rscreen->chip_class >= VI &&
+		(templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
+		!(rscreen->debug_flags & DBG_NO_HYPERZ) &&
+		!is_flushed_depth &&
+		templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
+		util_format_is_depth_or_stencil(templ->format);
+
 	int r;
 
 	r = r600_init_surface(rscreen, &surface, templ,
 			      r600_choose_tiling(rscreen, templ),
-			      templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
+			      is_flushed_depth, tc_compatible_htile);
 	if (r) {
 		return NULL;
 	}
@@ -1296,7 +1342,8 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
 	else
 		array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
 
-	r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
+	r = r600_init_surface(rscreen, &surface, templ, array_mode,
+			      false, false);
 	if (r) {
 		return NULL;
 	}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index 7146737..8946209 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -278,6 +278,7 @@ enum radeon_feature_id {
 #define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
 #define RADEON_SURF_FMASK                       (1 << 21)
 #define RADEON_SURF_DISABLE_DCC                 (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)
 
 #define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
 #define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
@@ -344,6 +345,9 @@ struct radeon_surf {
 
     uint64_t                    dcc_size;
     uint64_t                    dcc_alignment;
+    /* TC-compatible HTILE only. */
+    uint64_t                    htile_size;
+    uint64_t                    htile_alignment;
 };
 
 struct radeon_bo_list_item {
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index c143601..db41f56 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -332,6 +332,8 @@ si_flush_depth_texture(struct si_context *sctx,
 		}
 	}
 
+	assert(!tex->tc_compatible_htile || levels_z == 0);
+
 	/* We may have to allocate the flushed texture here when called from
 	 * si_decompress_subresource.
 	 */
@@ -699,7 +701,10 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 	    zsbuf->u.tex.level == 0 &&
 	    zsbuf->u.tex.first_layer == 0 &&
 	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
-		if (buffers & PIPE_CLEAR_DEPTH) {
+		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
+		if (buffers & PIPE_CLEAR_DEPTH &&
+		    (!zstex->tc_compatible_htile ||
+		     depth == 0 || depth == 1)) {
 			/* Need to disable EXPCLEAR temporarily if clearing
 			 * to a new value. */
 			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
@@ -713,7 +718,9 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 		}
 
-		if (buffers & PIPE_CLEAR_STENCIL) {
+		/* TC-compatible HTILE only supports stencil clears to 0. */
+		if (buffers & PIPE_CLEAR_STENCIL &&
+		    (!zstex->tc_compatible_htile || stencil == 0)) {
 			stencil &= 0xff;
 
 			/* Need to disable EXPCLEAR temporarily if clearing
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 350242a..19cae65 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -399,6 +399,9 @@ void si_set_mutable_tex_desc_fields(struct r600_texture *tex,
 		state[7] = ((!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
 			    tex->dcc_offset +
 			    base_level_info->dcc_offset) >> 8;
+	} else if (tex->tc_compatible_htile) {
+		state[6] |= S_008F28_COMPRESSION_EN(1);
+		state[7] = tex->htile_buffer->gpu_address >> 8;
 	}
 }
 
@@ -508,8 +511,10 @@ static void si_set_sampler_views(struct pipe_context *ctx,
 		if (views[i]->texture && views[i]->texture->target != PIPE_BUFFER) {
 			struct r600_texture *rtex =
 				(struct r600_texture*)views[i]->texture;
+			struct si_sampler_view *rview = (struct si_sampler_view *)views[i];
 
-			if (rtex->db_compatible) {
+			if (rtex->db_compatible &&
+			    (!rtex->tc_compatible_htile || rview->is_stencil_sampler)) {
 				samplers->depth_texture_mask |= 1u << slot;
 			} else {
 				samplers->depth_texture_mask &= ~(1u << slot);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index fc50205..b2d7699 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -4607,12 +4607,26 @@ static void tex_fetch_args(
 
 	/* Pack depth comparison value */
 	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
+		LLVMValueRef z;
+
 		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
-			address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+			z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
 		} else {
 			assert(ref_pos >= 0);
-			address[count++] = coords[ref_pos];
+			z = coords[ref_pos];
 		}
+
+		/* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
+		 * so the depth comparison value isn't clamped for Z16 and
+		 * Z24 anymore. Do it manually here.
+		 *
+		 * It's unnecessary if the original texture format was
+		 * Z32_FLOAT, but we don't know that here.
+		 */
+		if (ctx->screen->b.chip_class == VI)
+			z = radeon_llvm_saturate(bld_base, z);
+
+		address[count++] = z;
 	}
 
 	/* Pack user derivatives */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index ad65fc2..b23749c 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -686,6 +686,9 @@ static void si_update_poly_offset_state(struct si_context *sctx)
 	if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
 		return;
 
+	/* Use the user format, not db_render_format, so that the polygon
+	 * offset behaves as expected by applications.
+	 */
 	switch (sctx->framebuffer.state.zsbuf->texture->format) {
 	case PIPE_FORMAT_Z16_UNORM:
 		si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]);
@@ -2140,7 +2143,7 @@ static void si_init_depth_surface(struct si_context *sctx,
 	uint64_t z_offs, s_offs;
 	uint32_t db_htile_data_base, db_htile_surface;
 
-	format = si_translate_dbformat(rtex->resource.b.b.format);
+	format = si_translate_dbformat(rtex->db_render_format);
 
 	if (format == V_028040_Z_INVALID) {
 		R600_ERR("Invalid DB format: %d, disabling DB.\n", rtex->resource.b.b.format);
@@ -2151,7 +2154,7 @@ static void si_init_depth_surface(struct si_context *sctx,
 	z_offs += rtex->surface.level[level].offset;
 	s_offs += rtex->surface.stencil_level[level].offset;
 
-	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!rtex->tc_compatible_htile);
 
 	z_info = S_028040_FORMAT(format);
 	if (rtex->resource.b.b.nr_samples > 1) {
@@ -2208,13 +2211,37 @@ static void si_init_depth_surface(struct si_context *sctx,
 			 */
 			if (rtex->resource.b.b.nr_samples <= 1)
 				s_info |= S_028044_ALLOW_EXPCLEAR(1);
-		} else
-			/* Use all of the htile_buffer for depth if there's no stencil. */
+		} else if (!rtex->tc_compatible_htile) {
+			/* Use all of the htile_buffer for depth if there's no stencil.
+			 * This must not be set when TC-compatible HTILE is enabled
+			 * due to a hw bug.
+			 */
 			s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+		}
 
 		uint64_t va = rtex->htile_buffer->gpu_address;
 		db_htile_data_base = va >> 8;
 		db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+		if (rtex->tc_compatible_htile) {
+			db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+			switch (rtex->resource.b.b.nr_samples) {
+			case 0:
+			case 1:
+				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+				break;
+			case 2:
+			case 4:
+				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+				break;
+			case 8:
+				z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+				break;
+			default:
+				assert(0);
+			}
+		}
 	} else {
 		db_htile_data_base = 0;
 		db_htile_surface = 0;
@@ -2356,6 +2383,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 
 	if (state->zsbuf) {
 		surf = (struct r600_surface*)state->zsbuf;
+		rtex = (struct r600_texture*)surf->base.texture;
 
 		if (!surf->depth_initialized) {
 			si_init_depth_surface(sctx, surf);
@@ -3021,6 +3049,9 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 	surflevel = tmp->surface.level;
 
 	if (tmp->db_compatible) {
+		if (!view->is_stencil_sampler)
+			pipe_format = tmp->db_render_format;
+
 		switch (pipe_format) {
 		case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
 			pipe_format = PIPE_FORMAT_Z32_FLOAT;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c14e852..d18137b 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1118,7 +1118,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
 		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
 
-		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+		if (!rtex->tc_compatible_htile)
+			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
 
 		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
 			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
index 8bfea45..1bf07a7 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_surface.c
@@ -137,6 +137,7 @@ ADDR_HANDLE amdgpu_addr_create(struct amdgpu_winsys *ws)
    createFlags.value = 0;
    createFlags.useTileIndex = 1;
    createFlags.degradeBaseLevel = 1;
+   createFlags.useHtileSliceAlign = 1;
 
    addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
    addrCreateInput.chipFamily = ws->family;
@@ -160,7 +161,9 @@ static int compute_level(struct amdgpu_winsys *ws,
                          ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
                          ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
                          ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
-                         ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
+                         ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
+                         ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
+                         ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
 {
    struct radeon_surf_level *surf_level;
    ADDR_E_RETURNCODE ret;
@@ -257,6 +260,32 @@ static int compute_level(struct amdgpu_winsys *ws,
       }
    }
 
+   /* TC-compatible HTILE. */
+   if (!is_stencil &&
+       AddrSurfInfoIn->flags.depth &&
+       AddrSurfInfoIn->flags.tcCompatible &&
+       surf_level->mode == RADEON_SURF_MODE_2D &&
+       level == 0) {
+      AddrHtileIn->flags.tcCompatible = 1;
+      AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
+      AddrHtileIn->height = AddrSurfInfoOut->height;
+      AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
+      AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
+      AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
+      AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
+      AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
+      AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
+
+      ret = AddrComputeHtileInfo(ws->addrlib,
+                                 AddrHtileIn,
+                                 AddrHtileOut);
+
+      if (ret == ADDR_OK) {
+         surf->htile_size = AddrHtileOut->htileBytes;
+         surf->htile_alignment = AddrHtileOut->baseAlign;
+      }
+   }
+
    return 0;
 }
 
@@ -284,6 +313,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
    ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
    ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
+   ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
+   ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
    ADDR_TILEINFO AddrTileInfoIn = {0};
    ADDR_TILEINFO AddrTileInfoOut = {0};
    int r;
@@ -296,6 +327,8 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
    AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
    AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
+   AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
+   AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
    AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
 
    type = RADEON_SURF_GET(surf->flags, TYPE);
@@ -361,7 +394,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
    AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
    AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
-   AddrSurfInfoIn.flags.degrade4Space = 1;
+   AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
+
+   /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
+    * requested, because TC-compatible HTILE requires 2D tiling.
+    */
+   AddrSurfInfoIn.flags.degrade4Space = !AddrSurfInfoIn.flags.tcCompatible;
 
    /* DCC notes:
     * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
@@ -443,11 +481,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
    surf->bo_size = 0;
    surf->dcc_size = 0;
    surf->dcc_alignment = 1;
+   surf->htile_size = 0;
+   surf->htile_alignment = 1;
 
    /* Calculate texture layout information. */
    for (level = 0; level <= surf->last_level; level++) {
       r = compute_level(ws, surf, false, level, type, compressed,
-                        &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
+                        &AddrSurfInfoIn, &AddrSurfInfoOut,
+                        &AddrDccIn, &AddrDccOut, &AddrHtileIn, &AddrHtileOut);
       if (r)
          return r;
 
@@ -475,12 +516,14 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
       AddrSurfInfoIn.bpp = 8;
       AddrSurfInfoIn.flags.depth = 0;
       AddrSurfInfoIn.flags.stencil = 1;
+      AddrSurfInfoIn.flags.tcCompatible = 0;
       /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
       AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
 
       for (level = 0; level <= surf->last_level; level++) {
          r = compute_level(ws, surf, true, level, type, compressed,
-                           &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
+                           &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut,
+                           NULL, NULL);
          if (r)
             return r;
 
@@ -508,6 +551,12 @@ static int amdgpu_surface_init(struct radeon_winsys *rws,
                                ws->info.num_tile_pipes);
    }
 
+   /* Make sure HTILE covers the whole miptree, because the shader reads
+    * TC-compatible HTILE even for levels where it's disabled by DB.
+    */
+   if (surf->htile_size && surf->last_level)
+	   surf->htile_size *= 2;
+
    return 0;
 }
 




More information about the mesa-commit mailing list