[Mesa-dev] [PATCH 7/9] radeonsi: add EQAA SC, DB, CB register programming

Marek Olšák maraeo at gmail.com
Wed May 2 04:13:23 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_pipe.h  |  5 +-
 src/gallium/drivers/radeonsi/si_state.c | 74 +++++++++++++++++++++++--
 2 files changed, 71 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 241385baed7..55a135f3870 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -584,20 +584,21 @@ struct si_images {
 
 struct si_framebuffer {
 	struct pipe_framebuffer_state	state;
 	unsigned			colorbuf_enabled_4bit;
 	unsigned			spi_shader_col_format;
 	unsigned			spi_shader_col_format_alpha;
 	unsigned			spi_shader_col_format_blend;
 	unsigned			spi_shader_col_format_blend_alpha;
 	ubyte				nr_samples:5; /* at most 16xAA */
 	ubyte				log_samples:3; /* at most 4 = 16xAA */
+	ubyte				nr_color_samples; /* at most 8xAA */
 	ubyte				compressed_cb_mask;
 	ubyte				uncompressed_cb_mask;
 	ubyte				color_is_int8;
 	ubyte				color_is_int10;
 	ubyte				dirty_cbufs;
 	bool				dirty_zsbuf;
 	bool				any_dst_linear;
 	bool				CB_has_shader_readable_metadata;
 	bool				DB_has_shader_readable_metadata;
 };
@@ -1467,23 +1468,23 @@ si_htile_enabled(struct r600_texture *tex, unsigned level)
 static inline bool
 vi_tc_compat_htile_enabled(struct r600_texture *tex, unsigned level)
 {
 	assert(!tex->tc_compatible_htile || tex->htile_offset);
 	return tex->tc_compatible_htile && level == 0;
 }
 
 static inline unsigned si_get_ps_iter_samples(struct si_context *sctx)
 {
 	if (sctx->ps_uses_fbfetch)
-		return sctx->framebuffer.nr_samples;
+		return sctx->framebuffer.nr_color_samples;
 
-	return sctx->ps_iter_samples;
+	return MIN2(sctx->ps_iter_samples, sctx->framebuffer.nr_color_samples);
 }
 
 static inline unsigned si_get_total_colormask(struct si_context *sctx)
 {
 	if (sctx->queued.named.rasterizer->rasterizer_discard)
 		return 0;
 
 	struct si_shader_selector *ps = sctx->ps_shader.cso;
 	if (!ps)
 		return 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 3f9332081bf..fce796f7543 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2403,23 +2403,24 @@ static void si_initialize_color_surface(struct si_context *sctx,
 				    format != V_028C70_COLOR_24_8) |
 		S_028C70_NUMBER_TYPE(ntype) |
 		S_028C70_ENDIAN(endian);
 
 	/* Intensity is implemented as Red, so treat it that way. */
 	color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1 ||
 						  util_format_is_intensity(surf->base.format));
 
 	if (rtex->buffer.b.b.nr_samples > 1) {
 		unsigned log_samples = util_logbase2(rtex->buffer.b.b.nr_samples);
+		unsigned log_fragments = util_logbase2(rtex->num_color_samples);
 
 		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
-				S_028C74_NUM_FRAGMENTS(log_samples);
+				S_028C74_NUM_FRAGMENTS(log_fragments);
 
 		if (rtex->surface.fmask_size) {
 			color_info |= S_028C70_COMPRESSION(1);
 			unsigned fmask_bankh = util_logbase2(rtex->surface.u.legacy.fmask.bankh);
 
 			if (sctx->chip_class == SI) {
 				/* due to a hw bug, FMASK_BANK_HEIGHT must be set on SI too */
 				color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
 			}
 		}
@@ -2429,21 +2430,21 @@ static void si_initialize_color_surface(struct si_context *sctx,
 		unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
 		unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
 
 		/* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and
 		   64 for APU because all of our APUs to date use DIMMs which have
 		   a request granularity size of 64B while all other chips have a
 		   32B request size */
 		if (!sctx->screen->info.has_dedicated_vram)
 			min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
 
-		if (rtex->buffer.b.b.nr_samples > 1) {
+		if (rtex->num_color_samples > 1) {
 			if (rtex->surface.bpe == 1)
 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
 			else if (rtex->surface.bpe == 2)
 				max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
 		}
 
 		surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
 				       S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
 		                       S_028C78_INDEPENDENT_64B_BLOCKS(1);
 	}
@@ -2620,20 +2621,21 @@ static void si_init_depth_surface(struct si_context *sctx,
 				s_info |= S_028044_TILE_STENCIL_DISABLE(1);
 			}
 
 			surf->db_htile_data_base = (rtex->buffer.gpu_address +
 						    rtex->htile_offset) >> 8;
 			surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
 
 			if (rtex->tc_compatible_htile) {
 				surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
 
+				/* 0 = full compression. N = only compress up to N-1 Z planes. */
 				if (rtex->buffer.b.b.nr_samples <= 1)
 					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
 				else if (rtex->buffer.b.b.nr_samples <= 4)
 					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
 				else
 					z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
 			}
 		}
 	}
 
@@ -2798,20 +2800,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	sctx->framebuffer.spi_shader_col_format = 0;
 	sctx->framebuffer.spi_shader_col_format_alpha = 0;
 	sctx->framebuffer.spi_shader_col_format_blend = 0;
 	sctx->framebuffer.spi_shader_col_format_blend_alpha = 0;
 	sctx->framebuffer.color_is_int8 = 0;
 	sctx->framebuffer.color_is_int10 = 0;
 
 	sctx->framebuffer.compressed_cb_mask = 0;
 	sctx->framebuffer.uncompressed_cb_mask = 0;
 	sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state);
+	sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples;
 	sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples);
 	sctx->framebuffer.any_dst_linear = false;
 	sctx->framebuffer.CB_has_shader_readable_metadata = false;
 	sctx->framebuffer.DB_has_shader_readable_metadata = false;
 
 	for (i = 0; i < state->nr_cbufs; i++) {
 		if (!state->cbufs[i])
 			continue;
 
 		surf = (struct r600_surface*)state->cbufs[i];
@@ -2834,20 +2837,30 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		if (surf->color_is_int8)
 			sctx->framebuffer.color_is_int8 |= 1 << i;
 		if (surf->color_is_int10)
 			sctx->framebuffer.color_is_int10 |= 1 << i;
 
 		if (rtex->surface.fmask_size)
 			sctx->framebuffer.compressed_cb_mask |= 1 << i;
 		else
 			sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
 
+		/* Don't update nr_color_samples for non-AA buffers.
+		 * (e.g. destination of MSAA resolve)
+		 */
+		if (rtex->buffer.b.b.nr_samples >= 2 &&
+		    rtex->num_color_samples < rtex->buffer.b.b.nr_samples) {
+			sctx->framebuffer.nr_color_samples =
+				MIN2(sctx->framebuffer.nr_color_samples,
+				     rtex->num_color_samples);
+		}
+
 		if (rtex->surface.is_linear)
 			sctx->framebuffer.any_dst_linear = true;
 
 		if (vi_dcc_enabled(rtex, surf->base.u.tex.level))
 			sctx->framebuffer.CB_has_shader_readable_metadata = true;
 
 		si_context_add_resource_size(sctx, surf->base.texture);
 
 		p_atomic_inc(&rtex->framebuffers_bound);
 
@@ -3318,44 +3331,93 @@ static void si_emit_msaa_config(struct si_context *sctx)
 		S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
 		S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
 		S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
 		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
 		S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
 		S_028A4C_FORCE_EOV_REZ_ENABLE(1);
 	unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
 			   S_028804_INCOHERENT_EQAA_READS(1) |
 			   S_028804_INTERPOLATE_COMP_Z(1) |
 			   S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
+	unsigned coverage_samples, color_samples;
 
-	int setup_samples = sctx->framebuffer.nr_samples > 1 ? sctx->framebuffer.nr_samples :
-			    sctx->smoothing_enabled ? SI_NUM_SMOOTH_AA_SAMPLES : 0;
+	/* S: Coverage samples (up to 16x):
+	 * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES)
+	 * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES)
+	 *
+	 * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples):
+	 * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES)
+	 * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES)
+	 * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or
+	 * # from the closest defined sample if Z is uncompressed (same quality as the number of
+	 * # Z samples).
+	 *
+	 * F: Color samples (up to 8x, must be <= coverage samples):
+	 * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS)
+	 * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES)
+	 *
+	 * Can be anything between coverage and color samples:
+	 * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES)
+	 * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES)
+	 * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES)
+	 * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE)
+	 * # All are currently set the same as coverage samples.
+	 *
+	 * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown"
+	 * flag for undefined color samples. A shader-based resolve must handle unknowns
+	 * or mask them out with AND. Unknowns can also be guessed from neighbors via
+	 * an edge-detect shader-based resolve, which is required to make "color samples = 1"
+	 * useful. The CB resolve always drops unknowns.
+	 *
+	 * Sensible AA configurations:
+	 *   EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed
+	 *   EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed
+	 *   EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed
+	 *   EQAA  8s 8z 8f = 8x MSAA
+	 *   EQAA  8s 8z 4f - might look the same as 8x MSAA
+	 *   EQAA  8s 8z 2f - might look the same as 8x MSAA with low-density geometry
+	 *   EQAA  8s 4z 4f - might look the same as 8x MSAA if Z is compressed
+	 *   EQAA  8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed
+	 *   EQAA  4s 4z 4f = 4x MSAA
+	 *   EQAA  4s 4z 2f - might look the same as 4x MSAA with low-density geometry
+	 *   EQAA  2s 2z 2f = 2x MSAA
+	 */
+	if (sctx->framebuffer.nr_samples > 1) {
+		coverage_samples = sctx->framebuffer.nr_samples;
+		color_samples = sctx->framebuffer.nr_color_samples;
+	} else if (sctx->smoothing_enabled) {
+		coverage_samples = color_samples = SI_NUM_SMOOTH_AA_SAMPLES;
+	} else {
+		coverage_samples = color_samples = 1;
+	}
 
 	/* Required by OpenGL line rasterization.
 	 *
 	 * TODO: We should also enable perpendicular endcaps for AA lines,
 	 *       but that requires implementing line stippling in the pixel
 	 *       shader. SC can only do line stippling with axis-aligned
 	 *       endcaps.
 	 */
 	unsigned sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
 
-	if (setup_samples > 1) {
+	if (coverage_samples > 1) {
 		/* distance from the pixel center, indexed by log2(nr_samples) */
 		static unsigned max_dist[] = {
 			0, /* unused */
 			4, /* 2x MSAA */
 			6, /* 4x MSAA */
 			7, /* 8x MSAA */
 			8, /* 16x MSAA */
 		};
-		unsigned log_samples = util_logbase2(setup_samples);
+		unsigned log_samples = util_logbase2(coverage_samples);
 		unsigned ps_iter_samples = si_get_ps_iter_samples(sctx);
+		ps_iter_samples = MIN2(ps_iter_samples, color_samples);
 		unsigned log_ps_iter_samples =
 			util_logbase2(util_next_power_of_two(ps_iter_samples));
 
 		radeon_set_context_reg_seq(cs, R_028BDC_PA_SC_LINE_CNTL, 2);
 		radeon_emit(cs, sc_line_cntl |
 			    S_028BDC_EXPAND_LINE_WIDTH(1)); /* R_028BDC_PA_SC_LINE_CNTL */
 		radeon_emit(cs, S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
 			    S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
 			    S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples)); /* R_028BE0_PA_SC_AA_CONFIG */
 
-- 
2.17.0



More information about the mesa-dev mailing list