[Mesa-dev] [PATCH 080/140] radeonsi/gfx9: disable RB+ on Vega10

Marek Olšák maraeo at gmail.com
Mon Mar 20 22:43:30 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeon/r600_pipe_common.c   |  4 +++-
 src/gallium/drivers/radeon/r600_pipe_common.h   |  2 ++
 src/gallium/drivers/radeon/r600_texture.c       | 13 ++++++------
 src/gallium/drivers/radeonsi/si_pipe.c          | 13 ++++++++++++
 src/gallium/drivers/radeonsi/si_state.c         | 27 ++++++++++++-------------
 src/gallium/drivers/radeonsi/si_state_shaders.c |  2 +-
 6 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 42dc38b..c33b457 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -717,21 +717,21 @@ static const struct debug_named_value common_debug_options[] = {
 	{ "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" },
 	{ "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" },
 	{ "notiling", DBG_NO_TILING, "Disable tiling" },
 	{ "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." },
 	{ "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." },
 	{ "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." },
 	{ "nowc", DBG_NO_WC, "Disable GTT write combining" },
 	{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
 	{ "nodcc", DBG_NO_DCC, "Disable DCC." },
 	{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
-	{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." },
+	{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
 	{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
 	{ "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
 	{ "noce", DBG_NO_CE, "Disable the constant engine"},
 	{ "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
 	{ "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" },
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
 
 static const char* r600_get_vendor(struct pipe_screen* pscreen)
@@ -1310,20 +1310,22 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 		rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported;
 	}
 
 	r600_init_screen_texture_functions(rscreen);
 	r600_init_screen_query_functions(rscreen);
 
 	rscreen->ws = ws;
 	rscreen->family = rscreen->info.family;
 	rscreen->chip_class = rscreen->info.chip_class;
 	rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0);
+	rscreen->has_rbplus = false;
+	rscreen->rbplus_allowed = false;
 
 	r600_disk_cache_create(rscreen);
 
 	slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64);
 
 	rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1));
 	if (rscreen->force_aniso >= 0) {
 		printf("radeon: Forcing anisotropy filter to %ix\n",
 		       /* round down to a power of two */
 		       1 << util_logbase2(rscreen->force_aniso));
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 3516884..883d5ed 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -363,20 +363,22 @@ union r600_mmio_counters {
 
 struct r600_common_screen {
 	struct pipe_screen		b;
 	struct radeon_winsys		*ws;
 	enum radeon_family		family;
 	enum chip_class			chip_class;
 	struct radeon_info		info;
 	uint64_t			debug_flags;
 	bool				has_cp_dma;
 	bool				has_streamout;
+	bool				has_rbplus;     /* if RB+ registers exist */
+	bool				rbplus_allowed; /* if RB+ is allowed */
 
 	struct disk_cache		*disk_shader_cache;
 
 	struct slab_parent_pool		pool_transfers;
 
 	/* Texture filter settings. */
 	int				force_aniso; /* -1 = disabled */
 
 	/* Auxiliary context. Mainly used to initialize resources.
 	 * It must be locked prior to using and flushed before unlocking. */
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index ec7a325..2953379 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -2468,26 +2468,27 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 			continue;
 		}
 
 		/* Fast clear is the most appropriate place to enable DCC for
 		 * displayable surfaces.
 		 */
 		if (rctx->chip_class >= VI &&
 		    !(rctx->screen->debug_flags & DBG_NO_DCC_FB)) {
 			vi_separate_dcc_try_enable(rctx, tex);
 
-			/* Stoney can't do a CMASK-based clear, so all clears are
-			 * considered to be hypothetically slow clears, which
-			 * is weighed when determining to enable separate DCC.
+			/* RB+ isn't supported with a CMASK-based clear, so all
+			 * clears are considered to be hypothetically slow
+			 * clears, which is weighed when determining whether to
+			 * enable separate DCC.
 			 */
 			if (tex->dcc_gather_statistics &&
-			    rctx->family == CHIP_STONEY)
+			    rctx->screen->rbplus_allowed)
 				tex->num_slow_clears++;
 		}
 
 		/* Try to clear DCC first, otherwise try CMASK. */
 		if (tex->dcc_offset && tex->surface.num_dcc_levels) {
 			uint32_t reset_value;
 			bool clear_words_needed;
 
 			if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
 				continue;
@@ -2501,22 +2502,22 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
 
 			if (clear_words_needed)
 				tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
 			tex->separate_dcc_dirty = true;
 		} else {
 			/* 128-bit formats are unusupported */
 			if (tex->surface.bpe > 8) {
 				continue;
 			}
 
-			/* Stoney/RB+ doesn't work with CMASK fast clear. */
-			if (rctx->family == CHIP_STONEY)
+			/* RB+ doesn't work with CMASK fast clear. */
+			if (rctx->screen->rbplus_allowed)
 				continue;
 
 			/* ensure CMASK is enabled */
 			r600_texture_alloc_cmask_separate(rctx->screen, tex);
 			if (tex->cmask.size == 0) {
 				continue;
 			}
 
 			/* Do the fast clear. */
 			rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 7f0b445..8904b9d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -832,20 +832,33 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 		 sscreen->b.info.me_fw_version >= 173) ||
 		(sscreen->b.chip_class == SI &&
 		 sscreen->b.info.pfp_fw_version >= 121 &&
 		 sscreen->b.info.me_fw_version >= 87);
 
 	sscreen->has_ds_bpermute = HAVE_LLVM >= 0x0309 &&
 				   sscreen->b.chip_class >= VI;
 
 	sscreen->b.has_cp_dma = true;
 	sscreen->b.has_streamout = true;
+
+	/* Some chips have RB+ registers, but don't support RB+. Those must
+	 * always disable it.
+	 */
+	if (sscreen->b.family == CHIP_STONEY ||
+	    sscreen->b.chip_class >= GFX9) {
+		sscreen->b.has_rbplus = true;
+
+		sscreen->b.rbplus_allowed =
+			!(sscreen->b.debug_flags & DBG_NO_RB_PLUS) &&
+			sscreen->b.family == CHIP_STONEY;
+	}
+
 	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
 	sscreen->use_monolithic_shaders =
 		(sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
 
 	sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
 					    SI_CONTEXT_INV_VMEM_L1 |
 					    SI_CONTEXT_INV_GLOBAL_L2;
 	sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH;
 
 	if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index fa69b34..5f0eab1 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -110,22 +110,22 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a
 	 *
 	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
 	 */
 	if (blend && blend->dual_src_blend &&
 	    sctx->ps_shader.cso &&
 	    (sctx->ps_shader.cso->info.colors_written & 0x3) != 0x3)
 		cb_target_mask = 0;
 
 	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, cb_target_mask);
 
-	/* STONEY-specific register settings. */
-	if (sctx->b.family == CHIP_STONEY) {
+	/* RB+ register settings. */
+	if (sctx->screen->b.rbplus_allowed) {
 		unsigned spi_shader_col_format =
 			sctx->ps_shader.cso ?
 			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
 		unsigned sx_ps_downconvert = 0;
 		unsigned sx_blend_opt_epsilon = 0;
 		unsigned sx_blend_opt_control = 0;
 
 		for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
 			struct r600_surface *surf =
 				(struct r600_surface*)sctx->framebuffer.state.cbufs[i];
@@ -235,30 +235,29 @@ static void si_emit_cb_render_state(struct si_context *sctx, struct r600_atom *a
 
 			case V_028C70_COLOR_2_10_10_10:
 				if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
 					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
 					sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
 				}
 				break;
 			}
 		}
 
-		if (sctx->screen->b.debug_flags & DBG_NO_RB_PLUS) {
-			sx_ps_downconvert = 0;
-			sx_blend_opt_epsilon = 0;
-			sx_blend_opt_control = 0;
-		}
-
 		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
 		radeon_emit(cs, sx_ps_downconvert);	/* R_028754_SX_PS_DOWNCONVERT */
 		radeon_emit(cs, sx_blend_opt_epsilon);	/* R_028758_SX_BLEND_OPT_EPSILON */
 		radeon_emit(cs, sx_blend_opt_control);	/* R_02875C_SX_BLEND_OPT_CONTROL */
+	} else if (sctx->screen->b.has_rbplus) {
+		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
+		radeon_emit(cs, 0);	/* R_028754_SX_PS_DOWNCONVERT */
+		radeon_emit(cs, 0);	/* R_028758_SX_BLEND_OPT_EPSILON */
+		radeon_emit(cs, 0);	/* R_02875C_SX_BLEND_OPT_CONTROL */
 	}
 }
 
 /*
  * Blender functions
  */
 
 static uint32_t si_translate_blend_function(int blend_func)
 {
 	switch (blend_func) {
@@ -476,21 +475,21 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		}
 
 		/* cb_render_state will disable unused ones */
 		blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
 
 		if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
 			si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
 			continue;
 		}
 
-		/* Blending optimizations for Stoney.
+		/* Blending optimizations for RB+.
 		 * These transformations don't change the behavior.
 		 *
 		 * First, get rid of DST in the blend factors:
 		 *    func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
 		 */
 		si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
 				    PIPE_BLENDFACTOR_DST_COLOR,
 				    PIPE_BLENDFACTOR_SRC_COLOR);
 		si_blend_remove_dst(&eqA, &srcA, &dstA,
 				    PIPE_BLENDFACTOR_DST_COLOR,
@@ -551,21 +550,21 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		    dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
 			blend->need_src_alpha_4bit |= 0xfu << (i * 4);
 	}
 
 	if (blend->cb_target_mask) {
 		color_control |= S_028808_MODE(mode);
 	} else {
 		color_control |= S_028808_MODE(V_028808_CB_DISABLE);
 	}
 
-	if (sctx->b.family == CHIP_STONEY) {
+	if (sctx->screen->b.has_rbplus) {
 		/* Disable RB+ blend optimizations for dual source blending.
 		 * Vulkan does this.
 		 */
 		if (blend->dual_src_blend) {
 			for (int i = 0; i < 8; i++) {
 				sx_mrt_blend_opt[i] =
 					S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
 					S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
 			}
 		}
@@ -1190,22 +1189,22 @@ static void si_emit_db_render_state(struct si_context *sctx, struct r600_atom *s
 	/* Bug workaround for smoothing (overrasterization) on SI. */
 	if (sctx->b.chip_class == SI && sctx->smoothing_enabled) {
 		db_shader_control &= C_02880C_Z_ORDER;
 		db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z);
 	}
 
 	/* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */
 	if (!rs || !rs->multisample_enable)
 		db_shader_control &= C_02880C_MASK_EXPORT_ENABLE;
 
-	if (sctx->b.family == CHIP_STONEY &&
-	    sctx->screen->b.debug_flags & DBG_NO_RB_PLUS)
+	if (sctx->screen->b.has_rbplus &&
+	    !sctx->screen->b.rbplus_allowed)
 		db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
 
 	radeon_set_context_reg(cs, R_02880C_DB_SHADER_CONTROL,
 			       db_shader_control);
 }
 
 /*
  * format translation
  */
 static uint32_t si_translate_colorformat(enum pipe_format format)
@@ -1961,21 +1960,21 @@ static void si_choose_spi_color_formats(struct r600_surface *surf,
 					unsigned ntype, bool is_depth)
 {
 	/* Alpha is needed for alpha-to-coverage.
 	 * Blending may be with or without alpha.
 	 */
 	unsigned normal = 0; /* most optimal, may not support blending or export alpha */
 	unsigned alpha = 0; /* exports alpha, but may not support blending */
 	unsigned blend = 0; /* supports blending, but may not export alpha */
 	unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
 
-	/* Choose the SPI color formats. These are required values for Stoney/RB+.
+	/* Choose the SPI color formats. These are required values for RB+.
 	 * Other chips have multiple choices, though they are not necessarily better.
 	 */
 	switch (format) {
 	case V_028C70_COLOR_5_6_5:
 	case V_028C70_COLOR_1_5_5_5:
 	case V_028C70_COLOR_5_5_5_1:
 	case V_028C70_COLOR_4_4_4_4:
 	case V_028C70_COLOR_10_11_11:
 	case V_028C70_COLOR_11_11_10:
 	case V_028C70_COLOR_8:
@@ -4204,21 +4203,21 @@ static void si_init_config(struct si_context *sctx)
 		if (sctx->b.family == CHIP_FIJI ||
 		    sctx->b.family >= CHIP_POLARIS10)
 			vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
 
 		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
 	} else {
 		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
 		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
 	}
 
-	if (sctx->b.family == CHIP_STONEY)
+	if (sctx->screen->b.has_rbplus)
 		si_pm4_set_reg(pm4, R_028C40_PA_SC_SHADER_CONTROL, 0);
 
 	si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
 	if (sctx->b.chip_class >= CIK)
 		si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, border_color_va >> 40);
 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
 		      RADEON_PRIO_BORDER_COLORS);
 
 	if (sctx->b.chip_class >= GFX9) {
 		si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f778cd7..0696582 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2650,21 +2650,21 @@ bool si_update_shaders(struct si_context *sctx)
 			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
 
 		if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
 		    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
 		    sctx->flatshade != rs->flatshade) {
 			sctx->sprite_coord_enable = rs->sprite_coord_enable;
 			sctx->flatshade = rs->flatshade;
 			si_mark_atom_dirty(sctx, &sctx->spi_map);
 		}
 
-		if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
+		if (sctx->screen->b.rbplus_allowed && si_pm4_state_changed(sctx, ps))
 			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 
 		if (sctx->ps_db_shader_control != db_shader_control) {
 			sctx->ps_db_shader_control = db_shader_control;
 			si_mark_atom_dirty(sctx, &sctx->db_render_state);
 		}
 
 		if (sctx->smoothing_enabled != sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing) {
 			sctx->smoothing_enabled = sctx->ps_shader.current->key.part.ps.epilog.poly_line_smoothing;
 			si_mark_atom_dirty(sctx, &sctx->msaa_config);
-- 
2.7.4



More information about the mesa-dev mailing list