[Mesa-dev] [PATCH] radeonsi: implement RB+ for Stoney (v2)

Marek Olšák maraeo at gmail.com
Wed Dec 9 14:35:42 PST 2015


From: Marek Olšák <marek.olsak at amd.com>

v2: fix dual source blending
---
 src/gallium/drivers/radeon/r600_pipe_common.c |   1 +
 src/gallium/drivers/radeon/r600_pipe_common.h |   3 +
 src/gallium/drivers/radeon/r600_texture.c     |   6 +
 src/gallium/drivers/radeonsi/si_state.c       | 159 +++++++++++++++++++++++++-
 src/gallium/drivers/radeonsi/sid.h            |   3 +
 5 files changed, 170 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 8899ba4..ba541ac 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -375,6 +375,7 @@ static const struct debug_named_value common_debug_options[] = {
 	{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
 	{ "nodcc", DBG_NO_DCC, "Disable DCC." },
 	{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
+	{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 8c6c0c3..dd23ed5 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -86,6 +86,7 @@
 #define DBG_CHECK_VM		(1llu << 42)
 #define DBG_NO_DCC		(1llu << 43)
 #define DBG_NO_DCC_CLEAR	(1llu << 44)
+#define DBG_NO_RB_PLUS		(1llu << 45)
 
 #define R600_MAP_BUFFER_ALIGNMENT 64
 
@@ -250,6 +251,8 @@ struct r600_surface {
 	unsigned cb_color_fmask_slice;	/* EG and later */
 	unsigned cb_color_cmask;	/* CB_COLORn_TILE (r600 only) */
 	unsigned cb_color_mask;		/* R600 only */
+	unsigned sx_ps_downconvert;	/* Stoney only */
+	unsigned sx_blend_opt_epsilon;	/* Stoney only */
 	struct r600_resource *cb_buffer_fmask; /* Used for FMASK relocations. R600 only */
 	struct r600_resource *cb_buffer_cmask; /* Used for CMASK relocations. R600 only */
 
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index 774722f..8c145e5 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -1393,6 +1393,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 		return;
 
 	for (i = 0; i < fb->nr_cbufs; i++) {
+		struct r600_surface *surf;
 		struct r600_texture *tex;
 		unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
 
@@ -1403,6 +1404,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 		if (!(*buffers & clear_bit))
 			continue;
 
+		surf = (struct r600_surface *)fb->cbufs[i];
 		tex = (struct r600_texture *)fb->cbufs[i]->texture;
 
 		/* 128-bit formats are unusupported */
@@ -1449,6 +1451,10 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 			if (clear_words_needed)
 				tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
 		} else {
+			/* RB+ doesn't work with CMASK fast clear. */
+			if (surf->sx_ps_downconvert)
+				continue;
+
 			/* ensure CMASK is enabled */
 			r600_texture_alloc_cmask_separate(rctx->screen, tex);
 			if (tex->cmask.size == 0) {
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 2ebfa1c..dcf4a7b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -347,10 +347,54 @@ static uint32_t si_translate_blend_factor(int blend_fact)
 	return 0;
 }
 
+static uint32_t si_translate_blend_opt_function(int blend_func)
+{
+	switch (blend_func) {
+	case PIPE_BLEND_ADD:
+		return V_028760_OPT_COMB_ADD;
+	case PIPE_BLEND_SUBTRACT:
+		return V_028760_OPT_COMB_SUBTRACT;
+	case PIPE_BLEND_REVERSE_SUBTRACT:
+		return V_028760_OPT_COMB_REVSUBTRACT;
+	case PIPE_BLEND_MIN:
+		return V_028760_OPT_COMB_MIN;
+	case PIPE_BLEND_MAX:
+		return V_028760_OPT_COMB_MAX;
+	default:
+		return V_028760_OPT_COMB_BLEND_DISABLED;
+	}
+}
+
+static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha)
+{
+	switch (blend_fact) {
+	case PIPE_BLENDFACTOR_ZERO:
+		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
+	case PIPE_BLENDFACTOR_ONE:
+		return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
+	case PIPE_BLENDFACTOR_SRC_COLOR:
+		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
+				: V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+	case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
+				: V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+	case PIPE_BLENDFACTOR_SRC_ALPHA:
+		return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
+	case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+		return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
+	case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+		return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
+				: V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+	default:
+		return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+	}
+}
+
 static void *si_create_blend_state_mode(struct pipe_context *ctx,
 					const struct pipe_blend_state *state,
 					unsigned mode)
 {
+	struct si_context *sctx = (struct si_context*)ctx;
 	struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend);
 	struct si_pm4_state *pm4 = &blend->pm4;
 
@@ -416,8 +460,47 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 	} else {
 		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
 	}
-	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
 
+	if (sctx->b.family == CHIP_STONEY) {
+		uint32_t sx_blend_opt_control = 0;
+
+		for (int i = 0; i < 8; i++) {
+			const int j = state->independent_blend_enable ? i : 0;
+
+			/* TODO: We can also set this if the surface doesn't contain RGB. */
+			if (!state->rt[j].blend_enable ||
+			    !(state->rt[j].colormask & (PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B)))
+				sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (4 * i);
+
+			/* TODO: We can also set this if the surface doesn't contain alpha. */
+			if (!state->rt[j].blend_enable ||
+			    !(state->rt[j].colormask & PIPE_MASK_A))
+				sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (4 * i);
+
+			if (!state->rt[j].blend_enable) {
+				si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
+					       S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+					       S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED));
+				continue;
+			}
+
+			si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
+				S_028760_COLOR_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_src_factor, false)) |
+				S_028760_COLOR_DST_OPT(si_translate_blend_opt_factor(state->rt[j].rgb_dst_factor, false)) |
+				S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(state->rt[j].rgb_func)) |
+				S_028760_ALPHA_SRC_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_src_factor, true)) |
+				S_028760_ALPHA_DST_OPT(si_translate_blend_opt_factor(state->rt[j].alpha_dst_factor, true)) |
+				S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(state->rt[j].alpha_func)));
+		}
+
+		si_pm4_set_reg(pm4, R_02875C_SX_BLEND_OPT_CONTROL, sx_blend_opt_control);
+
+		/* RB+ doesn't work with dual source blending */
+		if (blend->dual_src_blend)
+			color_control |= S_028808_DISABLE_DUAL_QUAD(1);
+	}
+
+	si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control);
 	return blend;
 }
 
@@ -1057,6 +1140,10 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	if (sctx->framebuffer.nr_samples <= 1 || (rs && !rs->multisample_enable))
 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
 
+	if (sctx->b.family == CHIP_STONEY &&
+	    sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
+		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
+
 	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
 			       db_shader_control);
 }
@@ -1993,6 +2080,61 @@ static void si_initialize_color_surface(struct si_context *sctx,
 		surf->export_16bpc = true;
 	}
 
+	if (sctx->b.family == CHIP_STONEY &&
+	    !(sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)) {
+		switch (desc->channel[0].size) {
+		case 32:
+			if (desc->nr_channels == 1) {
+				if (swap == V_0280A0_SWAP_STD)
+					surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R;
+				else if (swap == V_0280A0_SWAP_ALT_REV)
+					surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_A;
+			}
+			break;
+		case 16:
+			/* For 1-channel formats, use the superset thereof. */
+			if (desc->nr_channels <= 2) {
+				if (swap == V_0280A0_SWAP_STD ||
+				    swap == V_0280A0_SWAP_STD_REV)
+					surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_GR;
+				else
+					surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_16_16_AR;
+			}
+			break;
+		case 11:
+			if (desc->nr_channels == 3) {
+				surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_10_11_11;
+				surf->sx_blend_opt_epsilon = V_028758_11BIT_FORMAT;
+			}
+			break;
+		case 10:
+			if (desc->nr_channels == 4) {
+				surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_2_10_10_10;
+				surf->sx_blend_opt_epsilon = V_028758_10BIT_FORMAT;
+			}
+			break;
+		case 8:
+			/* For 1 and 2-channel formats, use the superset thereof. */
+			surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_8_8_8_8;
+			surf->sx_blend_opt_epsilon = V_028758_8BIT_FORMAT;
+			break;
+		case 5:
+			if (desc->nr_channels == 3) {
+				surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_5_6_5;
+				surf->sx_blend_opt_epsilon = V_028758_6BIT_FORMAT;
+			} else if (desc->nr_channels == 4) {
+				surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_1_5_5_5;
+				surf->sx_blend_opt_epsilon = V_028758_5BIT_FORMAT;
+			}
+			break;
+		case 4:
+			/* For 1 nad 2-channel formats, use the superset thereof. */
+			surf->sx_ps_downconvert = V_028754_SX_RT_EXPORT_4_4_4_4;
+			surf->sx_blend_opt_epsilon = V_028758_4BIT_FORMAT;
+			break;
+		}
+	}
+
 	surf->color_initialized = true;
 }
 
@@ -2260,6 +2402,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 	unsigned i, nr_cbufs = state->nr_cbufs;
 	struct r600_texture *tex = NULL;
 	struct r600_surface *cb = NULL;
+	uint32_t sx_ps_downconvert = 0;
+	uint32_t sx_blend_opt_epsilon = 0;
 
 	/* Colorbuffers. */
 	for (i = 0; i < nr_cbufs; i++) {
@@ -2310,18 +2454,29 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
 
 		if (sctx->b.chip_class >= VI)
 			radeon_emit(cs, cb->cb_dcc_base);	/* R_028C94_CB_COLOR0_DCC_BASE */
+
+		sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
+		sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
 	}
 	/* set CB_COLOR1_INFO for possible dual-src blending */
 	if (i == 1 && state->cbufs[0] &&
 	    sctx->framebuffer.dirty_cbufs & (1 << 0)) {
 		radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + 1 * 0x3C,
 				       cb->cb_color_info | tex->cb_color_info);
+		sx_ps_downconvert |= cb->sx_ps_downconvert << (4 * i);
+		sx_blend_opt_epsilon |= cb->sx_blend_opt_epsilon << (4 * i);
 		i++;
 	}
 	for (; i < 8 ; i++)
 		if (sctx->framebuffer.dirty_cbufs & (1 << i))
 			radeon_set_context_reg(cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, 0);
 
+	if (sctx->b.family == CHIP_STONEY) {
+		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 2);
+		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
+		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
+	}
+
 	/* ZS buffer. */
 	if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
 		struct r600_surface *zb = (struct r600_surface*)state->zsbuf;
@@ -3486,7 +3641,7 @@ static void si_init_config(struct si_context *sctx)
 	}
 
 	if (sctx->b.family == CHIP_STONEY)
-		si_pm4_set_reg(pm4, R_028754_SX_PS_DOWNCONVERT, 0);
+		si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
 
 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
 	if (sctx->b.chip_class >= CIK)
diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
index 7866d58..9e1e158 100644
--- a/src/gallium/drivers/radeonsi/sid.h
+++ b/src/gallium/drivers/radeonsi/sid.h
@@ -6771,6 +6771,9 @@
 #define   G_028804_ENABLE_POSTZ_OVERRASTERIZATION(x)                  (((x) >> 27) & 0x1)
 #define   C_028804_ENABLE_POSTZ_OVERRASTERIZATION                     0xF7FFFFFF
 #define R_028808_CB_COLOR_CONTROL                                       0x028808
+#define   S_028808_DISABLE_DUAL_QUAD(x)                               (((x) & 0x1) << 0)
+#define   G_028808_DISABLE_DUAL_QUAD(x)                               (((x) >> 0) & 0x1)
+#define   C_028808_DISABLE_DUAL_QUAD                                  0xFFFFFFFE
 #define   S_028808_DEGAMMA_ENABLE(x)                                  (((x) & 0x1) << 3)
 #define   G_028808_DEGAMMA_ENABLE(x)                                  (((x) >> 3) & 0x1)
 #define   C_028808_DEGAMMA_ENABLE                                     0xFFFFFFF7
-- 
2.1.4



More information about the mesa-dev mailing list