[Mesa-dev] [PATCH 08/20] radeonsi: implement uncompressed MSAA rendering and color resolving

Marek Olšák maraeo at gmail.com
Wed Aug 7 17:20:48 PDT 2013


This is basic MSAA support which should work with most apps.
Some features are missing, those will be implemented in other commits.
---
 src/gallium/drivers/radeonsi/r600_blit.c     | 132 +++++++++++-
 src/gallium/drivers/radeonsi/r600_resource.h |   1 +
 src/gallium/drivers/radeonsi/r600_texture.c  |   7 +-
 src/gallium/drivers/radeonsi/radeonsi_pipe.c |   1 +
 src/gallium/drivers/radeonsi/radeonsi_pipe.h |   2 +
 src/gallium/drivers/radeonsi/si_state.c      | 289 +++++++++++++++++++++++++--
 src/gallium/drivers/radeonsi/si_state.h      |   1 +
 src/gallium/drivers/radeonsi/si_state_draw.c |   3 +-
 src/gallium/drivers/radeonsi/sid.h           |  10 +
 9 files changed, 423 insertions(+), 23 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/r600_blit.c b/src/gallium/drivers/radeonsi/r600_blit.c
index bab108e..3f9a184 100644
--- a/src/gallium/drivers/radeonsi/r600_blit.c
+++ b/src/gallium/drivers/radeonsi/r600_blit.c
@@ -43,6 +43,8 @@ enum r600_blitter_op /* bitmask */
 			     R600_DISABLE_RENDER_COND,
 
 	R600_DECOMPRESS    = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND,
+
+	R600_COLOR_RESOLVE = R600_SAVE_FRAMEBUFFER | R600_DISABLE_RENDER_COND
 };
 
 static void r600_blitter_begin(struct pipe_context *ctx, enum r600_blitter_op op)
@@ -463,22 +465,146 @@ static void r600_resource_copy_region(struct pipe_context *ctx,
 		r600_reset_blittable_to_orig(dst, dst_level, &orig_info[1]);
 }
 
+static boolean is_simple_msaa_resolve(const struct pipe_blit_info *info)
+{
+	unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
+	unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
+	struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
+	unsigned dst_tile_mode = dst->surface.level[info->dst.level].mode;
+
+	return info->dst.resource->format == info->src.resource->format &&
+		info->dst.resource->format == info->dst.format &&
+		info->src.resource->format == info->src.format &&
+		!info->scissor_enable &&
+		info->mask == PIPE_MASK_RGBA &&
+		dst_width == info->src.resource->width0 &&
+		dst_height == info->src.resource->height0 &&
+		info->dst.box.x == 0 &&
+		info->dst.box.y == 0 &&
+		info->dst.box.width == dst_width &&
+		info->dst.box.height == dst_height &&
+		info->src.box.x == 0 &&
+		info->src.box.y == 0 &&
+		info->src.box.width == dst_width &&
+		info->src.box.height == dst_height &&
+		/* Dst must be tiled. If it's not, we have to use a temporary
+		 * resource which is tiled. */
+		dst_tile_mode >= RADEON_SURF_MODE_1D;
+}
+
+/* For MSAA integer resolving to work, we change the format to NORM using this function. */
+static enum pipe_format int_to_norm_format(enum pipe_format format)
+{
+	switch (format) {
+#define REPLACE_FORMAT_SIGN(format,sign) \
+	case PIPE_FORMAT_##format##_##sign##INT: \
+		return PIPE_FORMAT_##format##_##sign##NORM
+#define REPLACE_FORMAT(format) \
+		REPLACE_FORMAT_SIGN(format, U); \
+		REPLACE_FORMAT_SIGN(format, S)
+
+	REPLACE_FORMAT_SIGN(B10G10R10A2, U);
+	REPLACE_FORMAT(R8);
+	REPLACE_FORMAT(R8G8);
+	REPLACE_FORMAT(R8G8B8X8);
+	REPLACE_FORMAT(R8G8B8A8);
+	REPLACE_FORMAT(A8);
+	REPLACE_FORMAT(I8);
+	REPLACE_FORMAT(L8);
+	REPLACE_FORMAT(L8A8);
+	REPLACE_FORMAT(R16);
+	REPLACE_FORMAT(R16G16);
+	REPLACE_FORMAT(R16G16B16X16);
+	REPLACE_FORMAT(R16G16B16A16);
+	REPLACE_FORMAT(A16);
+	REPLACE_FORMAT(I16);
+	REPLACE_FORMAT(L16);
+	REPLACE_FORMAT(L16A16);
+
+#undef REPLACE_FORMAT
+#undef REPLACE_FORMAT_SIGN
+	default:
+		return format;
+	}
+}
+
+static void si_msaa_color_resolve(struct pipe_context *ctx,
+				  const struct pipe_blit_info *info)
+{
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct pipe_screen *screen = ctx->screen;
+	struct pipe_resource *tmp, templ;
+	struct pipe_blit_info blit;
+	unsigned sample_mask = ~0;
+
+	assert(info->src.level == 0);
+	assert(info->src.box.depth == 1);
+	assert(info->dst.box.depth == 1);
+
+	if (is_simple_msaa_resolve(info)) {
+		r600_blitter_begin(ctx, R600_COLOR_RESOLVE);
+		util_blitter_custom_resolve_color(rctx->blitter,
+						  info->dst.resource, info->dst.level,
+						  info->dst.box.z,
+						  info->src.resource, info->src.box.z,
+						  sample_mask, rctx->custom_blend_resolve,
+                                                  int_to_norm_format(info->dst.format));
+		r600_blitter_end(ctx);
+		return;
+	}
+
+	/* resolve into a temporary texture, then blit */
+	templ.target = PIPE_TEXTURE_2D;
+	templ.format = info->src.resource->format;
+	templ.width0 = info->src.resource->width0;
+	templ.height0 = info->src.resource->height0;
+	templ.depth0 = 1;
+	templ.array_size = 1;
+	templ.last_level = 0;
+	templ.nr_samples = 0;
+	templ.usage = PIPE_USAGE_STATIC;
+	templ.bind = PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW;
+	templ.flags = R600_RESOURCE_FLAG_FORCE_TILING; /* dst must not have a linear layout */
+
+	tmp = screen->resource_create(screen, &templ);
+
+	/* resolve */
+	r600_blitter_begin(ctx, R600_COLOR_RESOLVE);
+	util_blitter_custom_resolve_color(rctx->blitter,
+					  tmp, 0, 0,
+					  info->src.resource, info->src.box.z,
+					  sample_mask, rctx->custom_blend_resolve,
+                                          int_to_norm_format(tmp->format));
+	r600_blitter_end(ctx);
+
+	/* blit */
+	blit = *info;
+	blit.src.resource = tmp;
+	blit.src.box.z = 0;
+
+	r600_blitter_begin(ctx, R600_BLIT);
+	util_blitter_blit(rctx->blitter, &blit);
+	r600_blitter_end(ctx);
+
+	pipe_resource_reference(&tmp, NULL);
+}
+
 static void si_blit(struct pipe_context *ctx,
                       const struct pipe_blit_info *info)
 {
 	struct r600_context *rctx = (struct r600_context*)ctx;
 	struct r600_texture *rsrc = (struct r600_texture*)info->src.resource;
 
-	assert(util_blitter_is_blit_supported(rctx->blitter, info));
-
 	if (info->src.resource->nr_samples > 1 &&
 	    info->dst.resource->nr_samples <= 1 &&
 	    !util_format_is_depth_or_stencil(info->src.resource->format) &&
 	    !util_format_is_pure_integer(info->src.resource->format)) {
-		debug_printf("radeonsi: color resolve is unimplemented\n");
+		si_msaa_color_resolve(ctx, info);
 		return;
 	}
 
+	assert(util_blitter_is_blit_supported(rctx->blitter, info));
+
 	if (rsrc->is_depth && !rsrc->is_flushing_texture) {
 		si_blit_decompress_depth_in_place(rctx, rsrc,
 						  info->src.level, info->src.level,
diff --git a/src/gallium/drivers/radeonsi/r600_resource.h b/src/gallium/drivers/radeonsi/r600_resource.h
index 24db2a9..ca8121f 100644
--- a/src/gallium/drivers/radeonsi/r600_resource.h
+++ b/src/gallium/drivers/radeonsi/r600_resource.h
@@ -28,6 +28,7 @@
 /* flag to indicate a resource is to be used as a transfer so should not be tiled */
 #define R600_RESOURCE_FLAG_TRANSFER		PIPE_RESOURCE_FLAG_DRV_PRIV
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH	(PIPE_RESOURCE_FLAG_DRV_PRIV << 1)
+#define R600_RESOURCE_FLAG_FORCE_TILING		(PIPE_RESOURCE_FLAG_DRV_PRIV << 2)
 
 /* Texture transfer. */
 struct r600_transfer {
diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c
index 62a7518..ed21bd1 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -102,8 +102,9 @@ static int r600_init_surface(struct r600_screen *rscreen,
 		}
 	}
 
-	surface->nsamples = 1;
+	surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1;
 	surface->flags = 0;
+
 	switch (array_mode) {
 	case V_009910_ARRAY_1D_TILED_THIN1:
 		surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
@@ -529,7 +530,9 @@ struct pipe_resource *si_texture_create(struct pipe_screen *screen,
 
 	if (!(templ->flags & R600_RESOURCE_FLAG_TRANSFER) &&
 	    !(templ->bind & PIPE_BIND_SCANOUT)) {
-		if (util_format_is_compressed(templ->format)) {
+		if (templ->flags & R600_RESOURCE_FLAG_FORCE_TILING) {
+			array_mode = V_009910_ARRAY_2D_TILED_THIN1;
+		} else if (util_format_is_compressed(templ->format)) {
 			array_mode = V_009910_ARRAY_1D_TILED_THIN1;
 		} else {
 			if (rscreen->chip_class >= CIK)
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index 3ebed98..18aebd2 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -187,6 +187,7 @@ static void r600_destroy_context(struct pipe_context *context)
 	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_depth);
 	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_stencil);
 	rctx->context.delete_depth_stencil_alpha_state(&rctx->context, rctx->custom_dsa_flush_inplace);
+	rctx->context.delete_blend_state(&rctx->context, rctx->custom_blend_resolve);
 	util_unreference_framebuffer_state(&rctx->framebuffer);
 
 	util_blitter_destroy(rctx->blitter);
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index 6fbe653..fc02e38 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -140,10 +140,12 @@ struct r600_context {
 	void				*custom_dsa_flush_depth;
 	void				*custom_dsa_flush_stencil;
 	void				*custom_dsa_flush_inplace;
+	void				*custom_blend_resolve;
 	struct r600_screen		*screen;
 	struct radeon_winsys		*ws;
 	struct si_vertex_element	*vertex_elements;
 	struct pipe_framebuffer_state	framebuffer;
+	unsigned			fb_log_samples;
 	unsigned			pa_sc_line_stipple;
 	unsigned			pa_su_sc_mode_cntl;
 	/* for saving when using blitter */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1dd51a8..4036d07 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -243,8 +243,9 @@ static uint32_t si_translate_blend_factor(int blend_fact)
 	return 0;
 }
 
-static void *si_create_blend_state(struct pipe_context *ctx,
-				   const struct pipe_blend_state *state)
+static void *si_create_blend_state_mode(struct pipe_context *ctx,
+					const struct pipe_blend_state *state,
+					unsigned mode)
 {
 	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
 	struct si_pm4_state *pm4 = &blend->pm4;
@@ -254,7 +255,7 @@ static void *si_create_blend_state(struct pipe_context *ctx,
 	if (blend == NULL)
 		return NULL;
 
-	color_control = S_028808_MODE(V_028808_CB_NORMAL);
+	color_control = S_028808_MODE(mode);
 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
 	} else {
@@ -262,8 +263,12 @@ static void *si_create_blend_state(struct pipe_context *ctx,
 	}
 	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
 
-	si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, ~0);
-	si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, ~0);
+	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
+		       S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
+		       S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
+		       S_028B70_ALPHA_TO_MASK_OFFSET1(2) |
+		       S_028B70_ALPHA_TO_MASK_OFFSET2(2) |
+		       S_028B70_ALPHA_TO_MASK_OFFSET3(2));
 
 	blend->cb_target_mask = 0;
 	for (int i = 0; i < 8; i++) {
@@ -304,6 +309,12 @@ static void *si_create_blend_state(struct pipe_context *ctx,
 	return blend;
 }
 
+static void *si_create_blend_state(struct pipe_context *ctx,
+				   const struct pipe_blend_state *state)
+{
+	return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL);
+}
+
 static void si_bind_blend_state(struct pipe_context *ctx, void *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
@@ -579,11 +590,12 @@ static void *si_create_rs_state(struct pipe_context *ctx,
 	tmp = (unsigned)state->line_width * 8;
 	si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, S_028A08_WIDTH(tmp));
 	si_pm4_set_reg(pm4, R_028A48_PA_SC_MODE_CNTL_0,
-			S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable));
+		       S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) |
+		       S_028A48_MSAA_ENABLE(state->multisample));
 
-	si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, 0x00000400);
 	si_pm4_set_reg(pm4, R_028BE4_PA_SU_VTX_CNTL,
-			S_028BE4_PIX_CENTER(state->half_pixel_center));
+		       S_028BE4_PIX_CENTER(state->half_pixel_center) |
+		       S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
 	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 0x3F800000);
 	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, 0x3F800000);
 	si_pm4_set_reg(pm4, R_028BF0_PA_CL_GB_HORZ_CLIP_ADJ, 0x3F800000);
@@ -749,7 +761,6 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
 	si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
 	si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
 	si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
-	si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 0x0000AA00);
 	dsa->db_render_override = db_render_override;
 
 	return dsa;
@@ -1660,6 +1671,7 @@ boolean si_is_format_supported(struct pipe_screen *screen,
                                unsigned sample_count,
                                unsigned usage)
 {
+	struct r600_screen *rscreen = (struct r600_screen *)screen;
 	unsigned retval = 0;
 
 	if (target >= PIPE_MAX_TEXTURE_TYPES) {
@@ -1670,9 +1682,19 @@ boolean si_is_format_supported(struct pipe_screen *screen,
 	if (!util_format_is_supported(format, usage))
 		return FALSE;
 
-	/* Multisample */
-	if (sample_count > 1)
-		return FALSE;
+	if (sample_count > 1) {
+		if (rscreen->chip_class >= CIK)
+			return FALSE;
+
+		switch (sample_count) {
+		case 2:
+		case 4:
+		case 8:
+			break;
+		default:
+			return FALSE;
+		}
+	}
 
 	if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
 	    si_is_sampler_format_supported(screen, format)) {
@@ -1823,6 +1845,12 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4,
 	color_attrib = S_028C74_TILE_MODE_INDEX(tile_mode_index) |
 		S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_1);
 
+	if (rtex->resource.b.b.nr_samples > 1) {
+		unsigned log_samples = util_logbase2(rtex->resource.b.b.nr_samples);
+		color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
+				S_028C74_NUM_FRAGMENTS(log_samples);
+	}
+
 	offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
 	offset >>= 8;
 
@@ -1905,6 +1933,10 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
 	db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
 
 	z_info = S_028040_FORMAT(format);
+	if (rtex->resource.b.b.nr_samples > 1) {
+		z_info |= S_028040_NUM_SAMPLES(util_logbase2(rtex->resource.b.b.nr_samples));
+	}
+
 	if (rtex->surface.flags & RADEON_SURF_SBUFFER)
 		s_info = S_028044_FORMAT(V_028044_STENCIL_8);
 	else
@@ -1969,13 +2001,205 @@ static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4,
 	si_pm4_set_reg(pm4, R_02805C_DB_DEPTH_SLICE, S_02805C_SLICE_TILE_MAX(slice));
 }
 
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y)  \
+	(((s0x) & 0xf) | (((s0y) & 0xf) << 4) |		   \
+	(((s1x) & 0xf) << 8) | (((s1y) & 0xf) << 12) |	   \
+	(((s2x) & 0xf) << 16) | (((s2y) & 0xf) << 20) |	   \
+	 (((s3x) & 0xf) << 24) | (((s3y) & 0xf) << 28))
+
+/* 2xMSAA
+ * There are two locations (-4, 4), (4, -4). */
+static uint32_t sample_locs_2x[] = {
+	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+	FILL_SREG(-4, 4, 4, -4, -4, 4, 4, -4),
+};
+static unsigned max_dist_2x = 4;
+/* 4xMSAA
+ * There are 4 locations: (-2, -2), (2, 2), (-6, 6), (6, -6). */
+static uint32_t sample_locs_4x[] = {
+	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+	FILL_SREG(-2, -2, 2, 2, -6, 6, 6, -6),
+};
+static unsigned max_dist_4x = 6;
+/* Cayman/SI 8xMSAA */
+static uint32_t cm_sample_locs_8x[] = {
+	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+	FILL_SREG(-2, -5, 3, -4, -1, 5, -6, -2),
+	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+	FILL_SREG( 6,  0, 0,  0, -5, 3,  4,  4),
+};
+static unsigned cm_max_dist_8x = 8;
+/* Cayman/SI 16xMSAA */
+static uint32_t cm_sample_locs_16x[] = {
+	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+	FILL_SREG(-7, -3, 7, 3, 1, -5, -5, 5),
+	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+	FILL_SREG(-3, -7, 3, 7, 5, -1, -1, 1),
+	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+	FILL_SREG(-8, -6, 4, 2, 2, -8, -2, 6),
+	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+	FILL_SREG(-4, -2, 0, 4, 6, -4, -6, 0),
+};
+static unsigned cm_max_dist_16x = 8;
+
+static void si_get_sample_position(struct pipe_context *ctx,
+				   unsigned sample_count,
+				   unsigned sample_index,
+				   float *out_value)
+{
+	int offset, index;
+	struct {
+		int idx:4;
+	} val;
+	switch (sample_count) {
+	case 1:
+	default:
+		out_value[0] = out_value[1] = 0.5;
+		break;
+	case 2:
+		offset = 4 * (sample_index * 2);
+		val.idx = (sample_locs_2x[0] >> offset) & 0xf;
+		out_value[0] = (float)(val.idx + 8) / 16.0f;
+		val.idx = (sample_locs_2x[0] >> (offset + 4)) & 0xf;
+		out_value[1] = (float)(val.idx + 8) / 16.0f;
+		break;
+	case 4:
+		offset = 4 * (sample_index * 2);
+		val.idx = (sample_locs_4x[0] >> offset) & 0xf;
+		out_value[0] = (float)(val.idx + 8) / 16.0f;
+		val.idx = (sample_locs_4x[0] >> (offset + 4)) & 0xf;
+		out_value[1] = (float)(val.idx + 8) / 16.0f;
+		break;
+	case 8:
+		offset = 4 * (sample_index % 4 * 2);
+		index = (sample_index / 4) * 4;
+		val.idx = (cm_sample_locs_8x[index] >> offset) & 0xf;
+		out_value[0] = (float)(val.idx + 8) / 16.0f;
+		val.idx = (cm_sample_locs_8x[index] >> (offset + 4)) & 0xf;
+		out_value[1] = (float)(val.idx + 8) / 16.0f;
+		break;
+	case 16:
+		offset = 4 * (sample_index % 4 * 2);
+		index = (sample_index / 4) * 4;
+		val.idx = (cm_sample_locs_16x[index] >> offset) & 0xf;
+		out_value[0] = (float)(val.idx + 8) / 16.0f;
+		val.idx = (cm_sample_locs_16x[index] >> (offset + 4)) & 0xf;
+		out_value[1] = (float)(val.idx + 8) / 16.0f;
+		break;
+	}
+}
+
+static void si_set_msaa_state(struct r600_context *rctx, struct si_pm4_state *pm4, int nr_samples)
+{
+	unsigned max_dist = 0;
+
+	switch (nr_samples) {
+	default:
+		nr_samples = 0;
+		break;
+	case 2:
+		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x[0]);
+		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x[1]);
+		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x[2]);
+		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x[3]);
+		max_dist = max_dist_2x;
+		break;
+	case 4:
+		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x[0]);
+		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x[1]);
+		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x[2]);
+		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x[3]);
+		max_dist = max_dist_4x;
+		break;
+	case 8:
+		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_8x[0]);
+		si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_8x[4]);
+		si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, 0);
+		si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, 0);
+		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_8x[1]);
+		si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_8x[5]);
+		si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, 0);
+		si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, 0);
+		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_8x[2]);
+		si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_8x[6]);
+		si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, 0);
+		si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, 0);
+		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_8x[3]);
+		si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_8x[7]);
+		max_dist = cm_max_dist_8x;
+		break;
+	case 16:
+		si_pm4_set_reg(pm4, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, cm_sample_locs_16x[0]);
+		si_pm4_set_reg(pm4, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, cm_sample_locs_16x[4]);
+		si_pm4_set_reg(pm4, R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2, cm_sample_locs_16x[8]);
+		si_pm4_set_reg(pm4, R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3, cm_sample_locs_16x[12]);
+		si_pm4_set_reg(pm4, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, cm_sample_locs_16x[1]);
+		si_pm4_set_reg(pm4, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, cm_sample_locs_16x[5]);
+		si_pm4_set_reg(pm4, R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2, cm_sample_locs_16x[9]);
+		si_pm4_set_reg(pm4, R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3, cm_sample_locs_16x[13]);
+		si_pm4_set_reg(pm4, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, cm_sample_locs_16x[2]);
+		si_pm4_set_reg(pm4, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, cm_sample_locs_16x[6]);
+		si_pm4_set_reg(pm4, R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2, cm_sample_locs_16x[10]);
+		si_pm4_set_reg(pm4, R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3, cm_sample_locs_16x[14]);
+		si_pm4_set_reg(pm4, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, cm_sample_locs_16x[3]);
+		si_pm4_set_reg(pm4, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, cm_sample_locs_16x[7]);
+		si_pm4_set_reg(pm4, R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2, cm_sample_locs_16x[11]);
+		si_pm4_set_reg(pm4, R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3, cm_sample_locs_16x[15]);
+		max_dist = cm_max_dist_16x;
+		break;
+	}
+
+	if (nr_samples > 1) {
+		unsigned log_samples = util_logbase2(nr_samples);
+
+		si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL,
+			       S_028BDC_LAST_PIXEL(1) |
+			       S_028BDC_EXPAND_LINE_WIDTH(1));
+		si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG,
+			       S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+			       S_028BE0_MAX_SAMPLE_DIST(max_dist) |
+			       S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples));
+
+		si_pm4_set_reg(pm4, R_028804_DB_EQAA,
+			       S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
+			       S_028804_PS_ITER_SAMPLES(log_samples) |
+			       S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+			       S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples) |
+			       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+			       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+	} else {
+		si_pm4_set_reg(pm4, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_LAST_PIXEL(1));
+		si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0);
+
+		si_pm4_set_reg(pm4, R_028804_DB_EQAA,
+			       S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
+			       S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
+	}
+}
+
 static void si_set_framebuffer_state(struct pipe_context *ctx,
 				     const struct pipe_framebuffer_state *state)
 {
 	struct r600_context *rctx = (struct r600_context *)ctx;
 	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
 	uint32_t tl, br;
-	int tl_x, tl_y, br_x, br_y;
+	int tl_x, tl_y, br_x, br_y, nr_samples;
 
 	if (pm4 == NULL)
 		return;
@@ -2013,7 +2237,16 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	si_pm4_set_reg(pm4, R_028208_PA_SC_WINDOW_SCISSOR_BR, br);
 	si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000);
 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
-	si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000);
+
+	if (state->nr_cbufs)
+		nr_samples = state->cbufs[0]->texture->nr_samples;
+	else if (state->zsbuf)
+		nr_samples = state->zsbuf->texture->nr_samples;
+	else
+		nr_samples = 0;
+
+	si_set_msaa_state(rctx, pm4, nr_samples);
+	rctx->fb_log_samples = util_logbase2(nr_samples);
 
 	si_pm4_set_state(rctx, framebuffer, pm4);
 	si_update_fb_rs_state(rctx);
@@ -2657,8 +2890,19 @@ static void si_bind_ps_sampler_states(struct pipe_context *ctx, unsigned count,
 	si_pm4_set_state(rctx, ps_sampler, pm4);
 }
 
-static void si_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
 {
+	struct r600_context *rctx = (struct r600_context *)ctx;
+	struct si_pm4_state *pm4 = si_pm4_alloc_state(rctx);
+	uint16_t mask = sample_mask;
+
+        if (pm4 == NULL)
+                return;
+
+	si_pm4_set_reg(pm4, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, mask | (mask << 16));
+	si_pm4_set_reg(pm4, R_028C3C_PA_SC_AA_MASK_X0Y1_X1Y1, mask | (mask << 16));
+
+	si_pm4_set_state(rctx, sample_mask, pm4);
 }
 
 static void si_delete_sampler_state(struct pipe_context *ctx, void *state)
@@ -2828,6 +3072,16 @@ static void si_texture_barrier(struct pipe_context *ctx)
 	si_pm4_set_state(rctx, texture_barrier, pm4);
 }
 
+static void *si_create_resolve_blend(struct r600_context *rctx)
+{
+	struct pipe_blend_state blend;
+
+	memset(&blend, 0, sizeof(blend));
+	blend.independent_blend_enable = true;
+	blend.rt[0].colormask = 0xf;
+	return si_create_blend_state_mode(&rctx->context, &blend, V_028808_CB_RESOLVE);
+}
+
 void si_init_state_functions(struct r600_context *rctx)
 {
 	rctx->context.create_blend_state = si_create_blend_state;
@@ -2842,10 +3096,12 @@ void si_init_state_functions(struct r600_context *rctx)
 	rctx->context.create_depth_stencil_alpha_state = si_create_dsa_state;
 	rctx->context.bind_depth_stencil_alpha_state = si_bind_dsa_state;
 	rctx->context.delete_depth_stencil_alpha_state = si_delete_dsa_state;
+
 	rctx->custom_dsa_flush_depth_stencil = si_create_db_flush_dsa(rctx, true, true);
 	rctx->custom_dsa_flush_depth = si_create_db_flush_dsa(rctx, true, false);
 	rctx->custom_dsa_flush_stencil = si_create_db_flush_dsa(rctx, false, true);
 	rctx->custom_dsa_flush_inplace = si_create_db_flush_dsa(rctx, false, false);
+	rctx->custom_blend_resolve = si_create_resolve_blend(rctx);
 
 	rctx->context.set_clip_state = si_set_clip_state;
 	rctx->context.set_scissor_states = si_set_scissor_states;
@@ -2853,6 +3109,7 @@ void si_init_state_functions(struct r600_context *rctx)
 	rctx->context.set_stencil_ref = si_set_pipe_stencil_ref;
 
 	rctx->context.set_framebuffer_state = si_set_framebuffer_state;
+	rctx->context.get_sample_position = si_get_sample_position;
 
 	rctx->context.create_vs_state = si_create_vs_state;
 	rctx->context.create_fs_state = si_create_fs_state;
@@ -2933,8 +3190,6 @@ void si_init_config(struct r600_context *rctx)
 	si_pm4_set_reg(pm4, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 0x76543210);
 	si_pm4_set_reg(pm4, R_028BD8_PA_SC_CENTROID_PRIORITY_1, 0xfedcba98);
 
-	si_pm4_set_reg(pm4, R_028804_DB_EQAA, 0x110000);
-
 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
 
 	if (rctx->chip_class >= CIK) {
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 7ce084e..ebe8c3f 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -78,6 +78,7 @@ union si_state {
 		struct si_state_blend		*blend;
 		struct si_pm4_state		*blend_color;
 		struct si_pm4_state		*clip;
+		struct si_pm4_state		*sample_mask;
 		struct si_pm4_state		*scissor;
 		struct si_state_viewport	*viewport;
 		struct si_pm4_state		*framebuffer;
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index 3363d46..e200ab1 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -605,7 +605,8 @@ static void si_state_draw(struct r600_context *rctx,
 		struct si_state_dsa *dsa = rctx->queued.named.dsa;
 
 		si_pm4_set_reg(pm4, R_028004_DB_COUNT_CONTROL,
-			       S_028004_PERFECT_ZPASS_COUNTS(1));
+			       S_028004_PERFECT_ZPASS_COUNTS(1) |
+			       S_028004_SAMPLE_RATE(rctx->fb_log_samples));
 		si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE,
 			       dsa->db_render_override |
 			       S_02800C_NOOP_CULL_DISABLE(1));
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 208d3a8..b91c4f2 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -6546,6 +6546,16 @@
 #define   G_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS(x)              (((x) >> 31) & 0x1)
 #define   C_028800_DISABLE_COLOR_WRITES_ON_DEPTH_PASS                 0x7FFFFFFF
 #define R_028804_DB_EQAA                                                0x028804
+#define   S_028804_MAX_ANCHOR_SAMPLES(x)		(((x) & 0x7) << 0)
+#define   S_028804_PS_ITER_SAMPLES(x)			(((x) & 0x7) << 4)
+#define   S_028804_MASK_EXPORT_NUM_SAMPLES(x)		(((x) & 0x7) << 8)
+#define   S_028804_ALPHA_TO_MASK_NUM_SAMPLES(x)		(((x) & 0x7) << 12)
+#define   S_028804_HIGH_QUALITY_INTERSECTIONS(x)	(((x) & 0x1) << 16)
+#define   S_028804_INCOHERENT_EQAA_READS(x)		(((x) & 0x1) << 17)
+#define   S_028804_INTERPOLATE_COMP_Z(x)		(((x) & 0x1) << 18)
+#define   S_028804_INTERPOLATE_SRC_Z(x)			(((x) & 0x1) << 19)
+#define   S_028804_STATIC_ANCHOR_ASSOCIATIONS(x)	(((x) & 0x1) << 20)
+#define   S_028804_ALPHA_TO_MASK_EQAA_DISABLE(x)	(((x) & 0x1) << 21)
 #define R_028808_CB_COLOR_CONTROL                                       0x028808
 #define   S_028808_DEGAMMA_ENABLE(x)                                  (((x) & 0x1) << 3)
 #define   G_028808_DEGAMMA_ENABLE(x)                                  (((x) >> 3) & 0x1)
-- 
1.8.1.2



More information about the mesa-dev mailing list