[Mesa-dev] [PATCH 6/7] gallium/radeon: don't allocate HTILE in a separate buffer

Samuel Pitoiset samuel.pitoiset at gmail.com
Wed Jun 7 20:20:43 UTC 2017


Yeah, this will help bindless a little bit because the winsys overhead 
becomes high with a huge buffers list. Though, there is not a ton of 
HTILE buffers.

A little comment on patch 4.

Patches 3-6 are:

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 06/07/2017 09:50 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> ---
>   src/gallium/drivers/r600/evergreen_state.c    |  6 +--
>   src/gallium/drivers/r600/r600_blit.c          |  2 +-
>   src/gallium/drivers/r600/r600_state.c         |  6 +--
>   src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
>   src/gallium/drivers/radeon/r600_texture.c     | 57 ++++++++++++---------------
>   src/gallium/drivers/radeonsi/si_blit.c        |  2 +-
>   src/gallium/drivers/radeonsi/si_descriptors.c |  9 +----
>   src/gallium/drivers/radeonsi/si_state.c       | 16 +++-----
>   8 files changed, 41 insertions(+), 59 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> index c3b939f..9595351 100644
> --- a/src/gallium/drivers/r600/evergreen_state.c
> +++ b/src/gallium/drivers/r600/evergreen_state.c
> @@ -1386,22 +1386,22 @@ static void evergreen_init_depth_surface(struct r600_context *rctx,
>   	} else {
>   		surf->db_stencil_base = offset;
>   		/* DRM 2.6.18 allows the INVALID format to disable stencil.
>   		 * Older kernels are out of luck. */
>   		surf->db_stencil_info = rctx->screen->b.info.drm_minor >= 18 ?
>   					S_028044_FORMAT(V_028044_STENCIL_INVALID) :
>   					S_028044_FORMAT(V_028044_STENCIL_8);
>   	}
>   
>   	/* use htile only for first level */
> -	if (rtex->htile_buffer && !level) {
> -		uint64_t va = rtex->htile_buffer->gpu_address;
> +	if (rtex->htile_offset && !level) {
> +		uint64_t va = rtex->resource.gpu_address + rtex->htile_offset;
>   		surf->db_htile_data_base = va >> 8;
>   		surf->db_htile_surface = S_028ABC_HTILE_WIDTH(1) |
>   					 S_028ABC_HTILE_HEIGHT(1) |
>   					 S_028ABC_FULL_CACHE(1);
>   		surf->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
>   		surf->db_preload_control = 0;
>   	}
>   
>   	surf->depth_initialized = true;
>   }
> @@ -1869,21 +1869,21 @@ static void evergreen_emit_db_state(struct r600_context *rctx, struct r600_atom
>   	struct r600_db_state *a = (struct r600_db_state*)atom;
>   
>   	if (a->rsurf && a->rsurf->db_htile_surface) {
>   		struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
>   		unsigned reloc_idx;
>   
>   		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
>   		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
>   		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, a->rsurf->db_preload_control);
>   		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
> -		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer,
> +		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource,
>   						  RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
>   		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
>   		radeon_emit(cs, reloc_idx);
>   	} else {
>   		radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, 0);
>   		radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0);
>   	}
>   }
>   
>   static void evergreen_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
> diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
> index 80aa9c0..79505d5 100644
> --- a/src/gallium/drivers/r600/r600_blit.c
> +++ b/src/gallium/drivers/r600/r600_blit.c
> @@ -437,21 +437,21 @@ static void r600_clear(struct pipe_context *ctx, unsigned buffers,
>   		struct r600_texture *rtex;
>   		unsigned level = fb->zsbuf->u.tex.level;
>   
>   		rtex = (struct r600_texture*)fb->zsbuf->texture;
>   
>   		/* We can't use hyperz fast clear if each slice of a texture
>   		 * array are clear to different value. To simplify code just
>   		 * disable fast clear for texture array.
>   		 */
>   		/* Only use htile for first level */
> -		if (rtex->htile_buffer && !level &&
> +		if (rtex->htile_offset && !level &&
>                      fb->zsbuf->u.tex.first_layer == 0 &&
>                      fb->zsbuf->u.tex.last_layer == util_max_layer(&rtex->resource.b.b, level)) {
>   			if (rtex->depth_clear_value != depth) {
>   				rtex->depth_clear_value = depth;
>   				r600_mark_atom_dirty(rctx, &rctx->db_state.atom);
>   			}
>   			rctx->db_misc_state.htile_clear = true;
>   			r600_mark_atom_dirty(rctx, &rctx->db_misc_state.atom);
>   		}
>   	}
> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> index 2001cfd..dca8fe5 100644
> --- a/src/gallium/drivers/r600/r600_state.c
> +++ b/src/gallium/drivers/r600/r600_state.c
> @@ -1054,22 +1054,22 @@ static void r600_init_depth_surface(struct r600_context *rctx,
>   	assert(format != ~0);
>   
>   	surf->db_depth_info = S_028010_ARRAY_MODE(array_mode) | S_028010_FORMAT(format);
>   	surf->db_depth_base = offset >> 8;
>   	surf->db_depth_view = S_028004_SLICE_START(surf->base.u.tex.first_layer) |
>   			      S_028004_SLICE_MAX(surf->base.u.tex.last_layer);
>   	surf->db_depth_size = S_028000_PITCH_TILE_MAX(pitch) | S_028000_SLICE_TILE_MAX(slice);
>   	surf->db_prefetch_limit = (rtex->surface.u.legacy.level[level].nblk_y / 8) - 1;
>   
>   	/* use htile only for first level */
> -	if (rtex->htile_buffer && !level) {
> -		surf->db_htile_data_base = 0;
> +	if (rtex->htile_offset && !level) {
> +		surf->db_htile_data_base = rtex->htile_offset >> 8;
>   		surf->db_htile_surface = S_028D24_HTILE_WIDTH(1) |
>   					 S_028D24_HTILE_HEIGHT(1) |
>   					 S_028D24_FULL_CACHE(1);
>   		/* preload is not working properly on r6xx/r7xx */
>   		surf->db_depth_info |= S_028010_TILE_SURFACE_ENABLE(1);
>   	}
>   
>   	surf->depth_initialized = true;
>   }
>   
> @@ -1536,21 +1536,21 @@ static void r600_emit_db_state(struct r600_context *rctx, struct r600_atom *atom
>   	struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
>   	struct r600_db_state *a = (struct r600_db_state*)atom;
>   
>   	if (a->rsurf && a->rsurf->db_htile_surface) {
>   		struct r600_texture *rtex = (struct r600_texture *)a->rsurf->base.texture;
>   		unsigned reloc_idx;
>   
>   		radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(rtex->depth_clear_value));
>   		radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, a->rsurf->db_htile_surface);
>   		radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, a->rsurf->db_htile_data_base);
> -		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, rtex->htile_buffer,
> +		reloc_idx = radeon_add_to_buffer_list(&rctx->b, &rctx->b.gfx, &rtex->resource,
>   						  RADEON_USAGE_READWRITE, RADEON_PRIO_HTILE);
>   		radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
>   		radeon_emit(cs, reloc_idx);
>   	} else {
>   		radeon_set_context_reg(cs, R_028D24_DB_HTILE_SURFACE, 0);
>   	}
>   }
>   
>   static void r600_emit_db_misc_state(struct r600_context *rctx, struct r600_atom *atom)
>   {
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index b17b690..84d38fb 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -225,21 +225,21 @@ struct r600_texture {
>   	/* Colorbuffer compression and fast clear. */
>   	struct r600_fmask_info		fmask;
>   	struct r600_cmask_info		cmask;
>   	struct r600_resource		*cmask_buffer;
>   	uint64_t			dcc_offset; /* 0 = disabled */
>   	unsigned			cb_color_info; /* fast clear enable bit */
>   	unsigned			color_clear_value[2];
>   	unsigned			last_msaa_resolve_target_micro_mode;
>   
>   	/* Depth buffer compression and fast clear. */
> -	struct r600_resource		*htile_buffer;
> +	uint64_t			htile_offset;
>   	bool				tc_compatible_htile;
>   	bool				depth_cleared; /* if it was cleared at least once */
>   	float				depth_clear_value;
>   	bool				stencil_cleared; /* if it was cleared at least once */
>   	uint8_t				stencil_clear_value;
>   
>   	bool				non_disp_tiling; /* R600-Cayman only */
>   
>   	/* Whether the texture is a displayable back buffer and needs DCC
>   	 * decompression, which is expensive. Therefore, it's enabled only
> diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
> index d00f05b..32275b1 100644
> --- a/src/gallium/drivers/radeon/r600_texture.c
> +++ b/src/gallium/drivers/radeon/r600_texture.c
> @@ -502,21 +502,21 @@ static void r600_degrade_tile_mode_to_linear(struct r600_common_context *rctx,
>   	rtex->resource.bo_size = new_tex->resource.bo_size;
>   	rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
>   	rtex->resource.domains = new_tex->resource.domains;
>   	rtex->resource.flags = new_tex->resource.flags;
>   	rtex->size = new_tex->size;
>   	rtex->surface = new_tex->surface;
>   	rtex->non_disp_tiling = new_tex->non_disp_tiling;
>   	rtex->cb_color_info = new_tex->cb_color_info;
>   	rtex->cmask = new_tex->cmask; /* needed even without CMASK */
>   
> -	assert(!rtex->htile_buffer);
> +	assert(!rtex->htile_offset);
>   	assert(!rtex->cmask.size);
>   	assert(!rtex->fmask.size);
>   	assert(!rtex->dcc_offset);
>   	assert(!rtex->is_depth);
>   
>   	r600_texture_reference(&new_tex, NULL);
>   
>   	p_atomic_inc(&rctx->screen->dirty_tex_counter);
>   }
>   
> @@ -605,21 +605,20 @@ static boolean r600_texture_get_handle(struct pipe_screen* screen,
>   }
>   
>   static void r600_texture_destroy(struct pipe_screen *screen,
>   				 struct pipe_resource *ptex)
>   {
>   	struct r600_texture *rtex = (struct r600_texture*)ptex;
>   	struct r600_resource *resource = &rtex->resource;
>   
>   	r600_texture_reference(&rtex->flushed_depth_texture, NULL);
>   
> -	r600_resource_reference(&rtex->htile_buffer, NULL);
>   	if (rtex->cmask_buffer != &rtex->resource) {
>   	    r600_resource_reference(&rtex->cmask_buffer, NULL);
>   	}
>   	pb_reference(&resource->buf, NULL);
>   	r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
>   	r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
>   	FREE(rtex);
>   }
>   
>   static const struct u_resource_vtbl r600_texture_vtbl;
> @@ -922,47 +921,28 @@ static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
>   
>   	rtex->surface.htile_alignment = base_align;
>   	rtex->surface.htile_size =
>   		(util_max_layer(&rtex->resource.b.b, 0) + 1) *
>   		align(slice_bytes, base_align);
>   }
>   
>   static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
>   					struct r600_texture *rtex)
>   {
> -	uint32_t clear_value;
> -
> -	if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile) {
> -		clear_value = 0x0000030F;
> -	} else {
> +	if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile)
>   		r600_texture_get_htile_size(rscreen, rtex);
> -		clear_value = 0;
> -	}
>   
>   	if (!rtex->surface.htile_size)
>   		return;
>   
> -	rtex->htile_buffer = (struct r600_resource*)
> -		r600_aligned_buffer_create(&rscreen->b,
> -					   R600_RESOURCE_FLAG_UNMAPPABLE,
> -					   PIPE_USAGE_DEFAULT,
> -					   rtex->surface.htile_size,
> -					   rtex->surface.htile_alignment);
> -	if (rtex->htile_buffer == NULL) {
> -		/* this is not a fatal error as we can still keep rendering
> -		 * without htile buffer */
> -		R600_ERR("Failed to create buffer object for htile buffer.\n");
> -	} else {
> -		r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b,
> -					 0, rtex->surface.htile_size,
> -					 clear_value);
> -	}
> +	rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
> +	rtex->size = rtex->htile_offset + rtex->surface.htile_size;
>   }
>   
>   void r600_print_texture_info(struct r600_common_screen *rscreen,
>   			     struct r600_texture *rtex, FILE *f)
>   {
>   	int i;
>   
>   	/* Common parameters. */
>   	fprintf(f, "  Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
>   		"blk_h=%u, array_size=%u, last_level=%u, "
> @@ -997,25 +977,26 @@ void r600_print_texture_info(struct r600_common_screen *rscreen,
>   		if (rtex->cmask.size) {
>   			fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", "
>   				"alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
>   				rtex->cmask.offset,
>   				rtex->surface.u.gfx9.cmask_size,
>   				rtex->surface.u.gfx9.cmask_alignment,
>   				rtex->surface.u.gfx9.cmask.rb_aligned,
>   				rtex->surface.u.gfx9.cmask.pipe_aligned);
>   		}
>   
> -		if (rtex->htile_buffer) {
> -			fprintf(f, "  HTile: size=%u, alignment=%u, "
> +		if (rtex->htile_offset) {
> +			fprintf(f, "  HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
>   				"rb_aligned=%u, pipe_aligned=%u\n",
> -				rtex->htile_buffer->b.b.width0,
> -				rtex->htile_buffer->buf->alignment,
> +				rtex->htile_offset,
> +				rtex->surface.htile_size,
> +				rtex->surface.htile_alignment,
>   				rtex->surface.u.gfx9.htile.rb_aligned,
>   				rtex->surface.u.gfx9.htile.pipe_aligned);
>   		}
>   
>   		if (rtex->dcc_offset) {
>   			fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", "
>   				"alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
>   				rtex->dcc_offset, rtex->surface.dcc_size,
>   				rtex->surface.dcc_alignment,
>   				rtex->surface.u.gfx9.dcc_pitch_max,
> @@ -1044,24 +1025,25 @@ void r600_print_texture_info(struct r600_common_screen *rscreen,
>   			rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
>   			rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
>   			rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
>   
>   	if (rtex->cmask.size)
>   		fprintf(f, "  CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
>   			"slice_tile_max=%u\n",
>   			rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
>   			rtex->cmask.slice_tile_max);
>   
> -	if (rtex->htile_buffer)
> -		fprintf(f, "  HTile: size=%u, alignment=%u, TC_compatible = %u\n",
> -			rtex->htile_buffer->b.b.width0,
> -			rtex->htile_buffer->buf->alignment,
> +	if (rtex->htile_offset)
> +		fprintf(f, "  HTile: offset=%"PRIu64", size=%"PRIu64", "
> +			"alignment=%u, TC_compatible = %u\n",
> +			rtex->htile_offset, rtex->surface.htile_size,
> +			rtex->surface.htile_alignment,
>   			rtex->tc_compatible_htile);
>   
>   	if (rtex->dcc_offset) {
>   		fprintf(f, "  DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
>   			rtex->dcc_offset, rtex->surface.dcc_size,
>   			rtex->surface.dcc_alignment);
>   		for (i = 0; i <= rtex->resource.b.b.last_level; i++)
>   			fprintf(f, "  DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
>   				"fast_clear_size=%"PRIu64"\n",
>   				i, i < rtex->surface.num_dcc_levels,
> @@ -1235,20 +1217,31 @@ r600_texture_create_object(struct pipe_screen *screen,
>   		else if (resource->domains & RADEON_DOMAIN_GTT)
>   			resource->gart_usage = buf->size;
>   	}
>   
>   	if (rtex->cmask.size) {
>   		/* Initialize the cmask to 0xCC (= compressed state). */
>   		r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
>   					 rtex->cmask.offset, rtex->cmask.size,
>   					 0xCCCCCCCC);
>   	}
> +	if (rtex->htile_offset) {
> +		uint32_t clear_value = 0;
> +
> +		if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
> +			clear_value = 0x0000030F;
> +
> +		r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
> +					 rtex->htile_offset,
> +					 rtex->surface.htile_size,
> +					 clear_value);
> +	}
>   
>   	/* Initialize DCC only if the texture is not being imported. */
>   	if (!buf && rtex->dcc_offset) {
>   		r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
>   					 rtex->dcc_offset,
>   					 rtex->surface.dcc_size,
>   					 0xFFFFFFFF);
>   	}
>   
>   	/* Initialize the CMASK base register value. */
> diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
> index e39ba62..74bc2e9 100644
> --- a/src/gallium/drivers/radeonsi/si_blit.c
> +++ b/src/gallium/drivers/radeonsi/si_blit.c
> @@ -719,21 +719,21 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
>   
>   			if (!fb->cbufs[i])
>   				continue;
>   
>   			tex = (struct r600_texture *)fb->cbufs[i]->texture;
>   			if (tex->fmask.size == 0)
>   				tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
>   		}
>   	}
>   
> -	if (zstex && zstex->htile_buffer &&
> +	if (zstex && zstex->htile_offset &&
>   	    zsbuf->u.tex.level == 0 &&
>   	    zsbuf->u.tex.first_layer == 0 &&
>   	    zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
>   		/* TC-compatible HTILE only supports depth clears to 0 or 1. */
>   		if (buffers & PIPE_CLEAR_DEPTH &&
>   		    (!zstex->tc_compatible_htile ||
>   		     depth == 0 || depth == 1)) {
>   			/* Need to disable EXPCLEAR temporarily if clearing
>   			 * to a new value. */
>   			if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 3aa2b9d..0e8606f 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -330,27 +330,20 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
>   	if (resource->target == PIPE_BUFFER)
>   		return;
>   
>   	/* Now add separate DCC or HTILE. */
>   	rtex = (struct r600_texture*)resource;
>   	if (rtex->dcc_separate_buffer) {
>   		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
>   						    rtex->dcc_separate_buffer, usage,
>   						    RADEON_PRIO_DCC, check_mem);
>   	}
> -
> -	if (rtex->htile_buffer &&
> -	    rtex->tc_compatible_htile) {
> -		radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
> -						    rtex->htile_buffer, usage,
> -						    RADEON_PRIO_HTILE, check_mem);
> -	}
>   }
>   
>   static void si_sampler_views_begin_new_cs(struct si_context *sctx,
>   					  struct si_sampler_views *views)
>   {
>   	unsigned mask = views->enabled_mask;
>   
>   	/* Add buffers to the CS. */
>   	while (mask) {
>   		int i = u_bit_scan(&mask);
> @@ -417,21 +410,21 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
>   		state[6] &= C_008F28_COMPRESSION_EN;
>   		state[7] = 0;
>   
>   		if (vi_dcc_enabled(tex, first_level)) {
>   			meta_va = (!tex->dcc_separate_buffer ? tex->resource.gpu_address : 0) +
>   				  tex->dcc_offset;
>   
>   			if (sscreen->b.chip_class <= VI)
>   				meta_va += base_level_info->dcc_offset;
>   		} else if (tex->tc_compatible_htile) {
> -			meta_va = tex->htile_buffer->gpu_address;
> +			meta_va = tex->resource.gpu_address + tex->htile_offset;
>   		}
>   
>   		if (meta_va) {
>   			state[6] |= S_008F28_COMPRESSION_EN(1);
>   			state[7] = meta_va >> 8;
>   		}
>   	}
>   
>   	if (sscreen->b.chip_class >= GFX9) {
>   		state[3] &= C_008F1C_SW_MODE;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index c7bc7b0..53f66ac 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -2309,21 +2309,21 @@ static void si_init_depth_surface(struct si_context *sctx,
>   			 S_028038_MAXMIP(rtex->resource.b.b.last_level);
>   		s_info = S_02803C_FORMAT(stencil_format) |
>   			 S_02803C_SW_MODE(rtex->surface.u.gfx9.stencil.swizzle_mode);
>   		surf->db_z_info2 = S_028068_EPITCH(rtex->surface.u.gfx9.surf.epitch);
>   		surf->db_stencil_info2 = S_02806C_EPITCH(rtex->surface.u.gfx9.stencil.epitch);
>   		surf->db_depth_view |= S_028008_MIPID(level);
>   		surf->db_depth_size = S_02801C_X_MAX(rtex->resource.b.b.width0 - 1) |
>   				      S_02801C_Y_MAX(rtex->resource.b.b.height0 - 1);
>   
>   		/* Only use HTILE for the first level. */
> -		if (rtex->htile_buffer && !level) {
> +		if (rtex->htile_offset && !level) {
>   			z_info |= S_028038_TILE_SURFACE_ENABLE(1) |
>   				  S_028038_ALLOW_EXPCLEAR(1);
>   
>   			if (rtex->tc_compatible_htile) {
>   				unsigned max_zplanes = 4;
>   
>   				if (rtex->db_render_format == PIPE_FORMAT_Z16_UNORM &&
>   				    rtex->resource.b.b.nr_samples > 1)
>   					max_zplanes = 2;
>   
> @@ -2335,21 +2335,22 @@ static void si_init_depth_surface(struct si_context *sctx,
>   			if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
>   				/* Stencil buffer workaround ported from the SI-CI-VI code.
>   				 * See that for explanation.
>   				 */
>   				s_info |= S_02803C_ALLOW_EXPCLEAR(rtex->resource.b.b.nr_samples <= 1);
>   			} else {
>   				/* Use all HTILE for depth if there's no stencil. */
>   				s_info |= S_02803C_TILE_STENCIL_DISABLE(1);
>   			}
>   
> -			surf->db_htile_data_base = rtex->htile_buffer->gpu_address >> 8;
> +			surf->db_htile_data_base = (rtex->resource.gpu_address +
> +						    rtex->htile_offset) >> 8;
>   			surf->db_htile_surface = S_028ABC_FULL_CACHE(1) |
>   						 S_028ABC_PIPE_ALIGNED(rtex->surface.u.gfx9.htile.pipe_aligned) |
>   						 S_028ABC_RB_ALIGNED(rtex->surface.u.gfx9.htile.rb_aligned);
>   		}
>   	} else {
>   		/* SI-CI-VI */
>   		struct legacy_surf_level *levelinfo = &rtex->surface.u.legacy.level[level];
>   
>   		assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
>   
> @@ -2387,21 +2388,21 @@ static void si_init_depth_surface(struct si_context *sctx,
>   			tile_mode_index = si_tile_mode_index(rtex, level, true);
>   			s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
>   		}
>   
>   		surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) |
>   				      S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1);
>   		surf->db_depth_slice = S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x *
>   								levelinfo->nblk_y) / 64 - 1);
>   
>   		/* Only use HTILE for the first level. */
> -		if (rtex->htile_buffer && !level) {
> +		if (rtex->htile_offset && !level) {
>   			z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
>   				  S_028040_ALLOW_EXPCLEAR(1);
>   
>   			if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
>   				/* Workaround: For a not yet understood reason, the
>   				 * combination of MSAA, fast stencil clear and stencil
>   				 * decompress messes with subsequent stencil buffer
>   				 * uses. Problem was reproduced on Verde, Bonaire,
>   				 * Tonga, and Carrizo.
>   				 *
> @@ -2413,21 +2414,22 @@ static void si_init_depth_surface(struct si_context *sctx,
>   				if (rtex->resource.b.b.nr_samples <= 1)
>   					s_info |= S_028044_ALLOW_EXPCLEAR(1);
>   			} else if (!rtex->tc_compatible_htile) {
>   				/* Use all of the htile_buffer for depth if there's no stencil.
>   				 * This must not be set when TC-compatible HTILE is enabled
>   				 * due to a hw bug.
>   				 */
>   				s_info |= S_028044_TILE_STENCIL_DISABLE(1);
>   			}
>   
> -			surf->db_htile_data_base = rtex->htile_buffer->gpu_address >> 8;
> +			surf->db_htile_data_base = (rtex->resource.gpu_address +
> +						    rtex->htile_offset) >> 8;
>   			surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
>   
>   			if (rtex->tc_compatible_htile) {
>   				surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
>   
>   				if (rtex->resource.b.b.nr_samples <= 1)
>   					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
>   				else if (rtex->resource.b.b.nr_samples <= 4)
>   					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
>   				else
> @@ -2808,26 +2810,20 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
>   	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
>   		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
>   		struct r600_texture *rtex = (struct r600_texture*)zb->base.texture;
>   
>   		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
>   				      &rtex->resource, RADEON_USAGE_READWRITE,
>   				      zb->base.texture->nr_samples > 1 ?
>   					      RADEON_PRIO_DEPTH_BUFFER_MSAA :
>   					      RADEON_PRIO_DEPTH_BUFFER);
>   
> -		if (zb->db_htile_data_base) {
> -			radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
> -					      rtex->htile_buffer, RADEON_USAGE_READWRITE,
> -					      RADEON_PRIO_HTILE);
> -		}
> -
>   		if (sctx->b.chip_class >= GFX9) {
>   			radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3);
>   			radeon_emit(cs, zb->db_htile_data_base);	/* DB_HTILE_DATA_BASE */
>   			radeon_emit(cs, zb->db_htile_data_base >> 32);	/* DB_HTILE_DATA_BASE_HI */
>   			radeon_emit(cs, zb->db_depth_size);		/* DB_DEPTH_SIZE */
>   
>   			radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);
>   			radeon_emit(cs, zb->db_z_info |			/* DB_Z_INFO */
>   				    S_028038_ZRANGE_PRECISION(rtex->depth_clear_value != 0));
>   			radeon_emit(cs, zb->db_stencil_info);		/* DB_STENCIL_INFO */
> 


More information about the mesa-dev mailing list