[Mesa-dev] [PATCH 12/15] radeonsi: set PA_SU_PRIM_FILTER_CNTL optimally

Marek Olšák maraeo at gmail.com
Tue Oct 2 22:35:44 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_gfx_cs.c |  1 +
 src/gallium/drivers/radeonsi/si_state.c  | 15 +++++++++++----
 src/gallium/drivers/radeonsi/si_state.h  |  1 +
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 5a6f7bb35cb..c458b68d846 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -332,20 +332,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
 		ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_CNTL]	= 0x00001000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG]	= 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA]	= 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
+		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL]	= 0x00090000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
 		ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL]	= 0x00000000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ]	= 0x3f800000;
 		ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8940f78cb54..0cebd974d80 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3236,20 +3236,21 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
 		radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
 	}
 
 	sctx->framebuffer.dirty_cbufs = 0;
 	sctx->framebuffer.dirty_zsbuf = false;
 }
 
 static void si_emit_msaa_sample_locs(struct si_context *sctx)
 {
 	struct radeon_cmdbuf *cs = sctx->gfx_cs;
+	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	unsigned nr_samples = sctx->framebuffer.nr_samples;
 	bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;
 
 	/* Smoothing (only possible with nr_samples == 1) uses the same
 	 * sample locations as the MSAA it simulates.
 	 */
 	if (nr_samples <= 1 && sctx->smoothing_enabled)
 		nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
 
 	/* On Polaris, the small primitive filter uses the sample locations
@@ -3257,40 +3258,49 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
 	 */
 	if (has_msaa_sample_loc_bug)
 		nr_samples = MAX2(nr_samples, 1);
 
 	if (nr_samples != sctx->sample_locs_num_samples) {
 		sctx->sample_locs_num_samples = nr_samples;
 		si_emit_sample_locations(cs, nr_samples);
 	}
 
 	if (sctx->family >= CHIP_POLARIS10) {
-		struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 		unsigned small_prim_filter_cntl =
 			S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
 			/* line bug */
 			S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);
 
 		/* The alternative of setting sample locations to 0 would
 		 * require a DB flush to avoid Z errors, see
 		 * https://bugs.freedesktop.org/show_bug.cgi?id=96908
 		 */
 		if (has_msaa_sample_loc_bug &&
 		    sctx->framebuffer.nr_samples > 1 &&
 		    !rs->multisample_enable)
 			small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
 
 		radeon_opt_set_context_reg(sctx,
 					   R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
 					   SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
 					   small_prim_filter_cntl);
 	}
+
+	/* The exclusion bits can be set to improve rasterization efficiency
+	 * if no sample lies on the pixel boundary (-8 sample offset).
+	 */
+	bool exclusion = sctx->chip_class >= CIK &&
+			 (!rs->multisample_enable || nr_samples != 16);
+	radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+				   SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
+				   S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
+				   S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
 }
 
 static bool si_out_of_order_rasterization(struct si_context *sctx)
 {
 	struct si_state_blend *blend = sctx->queued.named.blend;
 	struct si_state_dsa *dsa = sctx->queued.named.dsa;
 
 	if (!sctx->screen->has_out_of_order_rast)
 		return false;
 
@@ -4882,23 +4892,20 @@ static void si_init_config(struct si_context *sctx)
 		si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
 	}
 
 	si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
 	if (!has_clear_state)
 		si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
 	if (sctx->chip_class < CIK)
 		si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
 			       S_008A14_CLIP_VTX_REORDER_ENA(1));
 
-	if (!has_clear_state)
-		si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
 	/* CLEAR_STATE doesn't clear these correctly on certain generations.
 	 * I don't know why. Deduced by trial and error.
 	 */
 	if (sctx->chip_class <= CIK) {
 		si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
 		si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
 		si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
 		si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
 			       S_028244_BR_X(16384) | S_028244_BR_Y(16384));
 		si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f22a1637a88..1c347cf7e00 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -262,20 +262,21 @@ enum si_tracked_reg {
 	SI_TRACKED_SX_PS_DOWNCONVERT, /* 3 consecutive registers */
 	SI_TRACKED_SX_BLEND_OPT_EPSILON,
 	SI_TRACKED_SX_BLEND_OPT_CONTROL,
 
 	SI_TRACKED_PA_SC_LINE_CNTL, /* 2 consecutive registers */
 	SI_TRACKED_PA_SC_AA_CONFIG,
 
 	SI_TRACKED_DB_EQAA,
 	SI_TRACKED_PA_SC_MODE_CNTL_1,
 
+	SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
 	SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
 
 	SI_TRACKED_PA_CL_VS_OUT_CNTL,
 	SI_TRACKED_PA_CL_CLIP_CNTL,
 
 	SI_TRACKED_PA_SC_BINNER_CNTL_0,
 	SI_TRACKED_DB_DFSM_CONTROL,
 
 	SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */
 	SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
-- 
2.17.1



More information about the mesa-dev mailing list