[Mesa-dev] [PATCH 12/15] radeonsi: set PA_SU_PRIM_FILTER_CNTL optimally
Marek Olšák
maraeo at gmail.com
Tue Oct 2 22:35:44 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_gfx_cs.c | 1 +
src/gallium/drivers/radeonsi/si_state.c | 15 +++++++++++----
src/gallium/drivers/radeonsi/si_state.h | 1 +
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_gfx_cs.c b/src/gallium/drivers/radeonsi/si_gfx_cs.c
index 5a6f7bb35cb..c458b68d846 100644
--- a/src/gallium/drivers/radeonsi/si_gfx_cs.c
+++ b/src/gallium/drivers/radeonsi/si_gfx_cs.c
@@ -332,20 +332,21 @@ void si_begin_new_gfx_cs(struct si_context *ctx)
ctx->tracked_regs.reg_value[SI_TRACKED_DB_SHADER_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_TARGET_MASK] = 0xffffffff;
ctx->tracked_regs.reg_value[SI_TRACKED_CB_DCC_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_PS_DOWNCONVERT] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_EPSILON] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_SX_BLEND_OPT_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_LINE_CNTL] = 0x00001000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_AA_CONFIG] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_EQAA] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_MODE_CNTL_1] = 0x00000000;
+ ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_PRIM_FILTER_CNTL] = 0;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_VS_OUT_CNTL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_CLIP_CNTL] = 0x00090000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SC_BINNER_CNTL_0] = 0x00000003;
ctx->tracked_regs.reg_value[SI_TRACKED_DB_DFSM_CONTROL] = 0x00000000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_CLIP_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_CL_GB_HORZ_DISC_ADJ] = 0x3f800000;
ctx->tracked_regs.reg_value[SI_TRACKED_PA_SU_HARDWARE_SCREEN_OFFSET] = 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 8940f78cb54..0cebd974d80 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3236,20 +3236,21 @@ static void si_emit_framebuffer_state(struct si_context *sctx)
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
sctx->framebuffer.dirty_cbufs = 0;
sctx->framebuffer.dirty_zsbuf = false;
}
static void si_emit_msaa_sample_locs(struct si_context *sctx)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
+ struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned nr_samples = sctx->framebuffer.nr_samples;
bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug;
/* Smoothing (only possible with nr_samples == 1) uses the same
* sample locations as the MSAA it simulates.
*/
if (nr_samples <= 1 && sctx->smoothing_enabled)
nr_samples = SI_NUM_SMOOTH_AA_SAMPLES;
/* On Polaris, the small primitive filter uses the sample locations
@@ -3257,40 +3258,49 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx)
*/
if (has_msaa_sample_loc_bug)
nr_samples = MAX2(nr_samples, 1);
if (nr_samples != sctx->sample_locs_num_samples) {
sctx->sample_locs_num_samples = nr_samples;
si_emit_sample_locations(cs, nr_samples);
}
if (sctx->family >= CHIP_POLARIS10) {
- struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned small_prim_filter_cntl =
S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
/* line bug */
S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12);
/* The alternative of setting sample locations to 0 would
* require a DB flush to avoid Z errors, see
* https://bugs.freedesktop.org/show_bug.cgi?id=96908
*/
if (has_msaa_sample_loc_bug &&
sctx->framebuffer.nr_samples > 1 &&
!rs->multisample_enable)
small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
radeon_opt_set_context_reg(sctx,
R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
small_prim_filter_cntl);
}
+
+ /* The exclusion bits can be set to improve rasterization efficiency
+ * if no sample lies on the pixel boundary (-8 sample offset).
+ */
+ bool exclusion = sctx->chip_class >= CIK &&
+ (!rs->multisample_enable || nr_samples != 16);
+ radeon_opt_set_context_reg(sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+ SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
+ S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
+ S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
}
static bool si_out_of_order_rasterization(struct si_context *sctx)
{
struct si_state_blend *blend = sctx->queued.named.blend;
struct si_state_dsa *dsa = sctx->queued.named.dsa;
if (!sctx->screen->has_out_of_order_rast)
return false;
@@ -4882,23 +4892,20 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
}
si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
if (!has_clear_state)
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->chip_class < CIK)
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
S_008A14_CLIP_VTX_REORDER_ENA(1));
- if (!has_clear_state)
- si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
/* CLEAR_STATE doesn't clear these correctly on certain generations.
* I don't know why. Deduced by trial and error.
*/
if (sctx->chip_class <= CIK) {
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
S_028244_BR_X(16384) | S_028244_BR_Y(16384));
si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f22a1637a88..1c347cf7e00 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -262,20 +262,21 @@ enum si_tracked_reg {
SI_TRACKED_SX_PS_DOWNCONVERT, /* 3 consecutive registers */
SI_TRACKED_SX_BLEND_OPT_EPSILON,
SI_TRACKED_SX_BLEND_OPT_CONTROL,
SI_TRACKED_PA_SC_LINE_CNTL, /* 2 consecutive registers */
SI_TRACKED_PA_SC_AA_CONFIG,
SI_TRACKED_DB_EQAA,
SI_TRACKED_PA_SC_MODE_CNTL_1,
+ SI_TRACKED_PA_SU_PRIM_FILTER_CNTL,
SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL,
SI_TRACKED_PA_CL_VS_OUT_CNTL,
SI_TRACKED_PA_CL_CLIP_CNTL,
SI_TRACKED_PA_SC_BINNER_CNTL_0,
SI_TRACKED_DB_DFSM_CONTROL,
SI_TRACKED_PA_CL_GB_VERT_CLIP_ADJ, /* 4 consecutive registers */
SI_TRACKED_PA_CL_GB_VERT_DISC_ADJ,
--
2.17.1
More information about the mesa-dev
mailing list