[Mesa-dev] [PATCH 3/5] radeonsi: enable out-of-order rasterization when possible on VI and GFX9 dGPUs
Nicolai Hähnle
nhaehnle at gmail.com
Sat Sep 9 10:43:56 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
This does not take commutative blending into account yet.
R600_DEBUG=nooutoforder disables it.
---
src/gallium/drivers/radeon/r600_pipe_common.c | 1 +
src/gallium/drivers/radeon/r600_pipe_common.h | 2 +-
src/gallium/drivers/radeonsi/si_pipe.c | 3 +
src/gallium/drivers/radeonsi/si_pipe.h | 1 +
src/gallium/drivers/radeonsi/si_state.c | 157 +++++++++++++++++++++++-
src/gallium/drivers/radeonsi/si_state.h | 28 ++++-
src/gallium/drivers/radeonsi/si_state_shaders.c | 7 ++
7 files changed, 193 insertions(+), 6 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index 1302e112c03..64851c615b0 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -818,20 +818,21 @@ static const struct debug_named_value common_debug_options[] = {
{ "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." },
{ "nodcc", DBG_NO_DCC, "Disable DCC." },
{ "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." },
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
{ "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
{ "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
{ "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" },
{ "nodpbb", DBG_NO_DPBB, "Disable DPBB." },
{ "nodfsm", DBG_NO_DFSM, "Disable DFSM." },
+ { "nooutoforder", DBG_NO_OUT_OF_ORDER, "Disable out-of-order rasterization" },
DEBUG_NAMED_VALUE_END /* must be last */
};
static const char* r600_get_vendor(struct pipe_screen* pscreen)
{
return "X.Org";
}
static const char* r600_get_device_vendor(struct pipe_screen* pscreen)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index ed93d99669f..6074f4d440e 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -103,21 +103,21 @@ struct u_log_context;
#define DBG_FORCE_DMA (1ull << 38)
#define DBG_PRECOMPILE (1ull << 39)
#define DBG_INFO (1ull << 40)
#define DBG_NO_WC (1ull << 41)
#define DBG_CHECK_VM (1ull << 42)
#define DBG_NO_DCC (1ull << 43)
#define DBG_NO_DCC_CLEAR (1ull << 44)
#define DBG_NO_RB_PLUS (1ull << 45)
#define DBG_SI_SCHED (1ull << 46)
#define DBG_MONOLITHIC_SHADERS (1ull << 47)
-/* gap */
+#define DBG_NO_OUT_OF_ORDER (1ull << 48)
#define DBG_UNSAFE_MATH (1ull << 49)
#define DBG_NO_DCC_FB (1ull << 50)
#define DBG_TEST_VMFAULT_CP (1ull << 51)
#define DBG_TEST_VMFAULT_SDMA (1ull << 52)
#define DBG_TEST_VMFAULT_SHADER (1ull << 53)
#define DBG_NO_DPBB (1ull << 54)
#define DBG_NO_DFSM (1ull << 55)
#define R600_MAP_BUFFER_ALIGNMENT 64
#define R600_MAX_VIEWPORTS 16
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index ca2e055a90e..9f3651f2526 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1038,20 +1038,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
sscreen->b.info.pfp_fw_version >= 121 &&
sscreen->b.info.me_fw_version >= 87) ||
(sscreen->b.chip_class == CIK &&
sscreen->b.info.pfp_fw_version >= 211 &&
sscreen->b.info.me_fw_version >= 173) ||
(sscreen->b.chip_class == SI &&
sscreen->b.info.pfp_fw_version >= 79 &&
sscreen->b.info.me_fw_version >= 142);
sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
+ sscreen->has_out_of_order_rast = sscreen->b.chip_class >= VI &&
+ sscreen->b.info.max_se >= 2 &&
+ !(sscreen->b.debug_flags & DBG_NO_OUT_OF_ORDER);
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
sscreen->b.family <= CHIP_POLARIS12) ||
sscreen->b.family == CHIP_VEGA10 ||
sscreen->b.family == CHIP_RAVEN;
sscreen->dpbb_allowed = sscreen->b.chip_class >= GFX9 &&
!(sscreen->b.debug_flags & DBG_NO_DPBB);
sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
!(sscreen->b.debug_flags & DBG_NO_DFSM);
/* While it would be nice not to have this flag, we are constrained
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 8db7028c9a1..b8073ce9c09 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -88,20 +88,21 @@ struct hash_table;
struct u_suballocator;
struct si_screen {
struct r600_common_screen b;
unsigned gs_table_depth;
unsigned tess_offchip_block_dw_size;
bool has_clear_state;
bool has_distributed_tess;
bool has_draw_indirect_multi;
bool has_ds_bpermute;
+ bool has_out_of_order_rast;
bool has_msaa_sample_loc_bug;
bool dpbb_allowed;
bool dfsm_allowed;
bool llvm_has_working_vgpr_indexing;
/* Whether shaders are monolithic (1-part) or separate (3-part). */
bool use_monolithic_shaders;
bool record_llvm_ir;
mtx_t shader_parts_mutex;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 6978c6ca9a2..06f86aaf92a 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -416,20 +416,21 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
struct si_pm4_state *pm4 = &blend->pm4;
uint32_t sx_mrt_blend_opt[8] = {0};
uint32_t color_control = 0;
if (!blend)
return NULL;
blend->alpha_to_coverage = state->alpha_to_coverage;
blend->alpha_to_one = state->alpha_to_one;
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
+ blend->logicop_enable = state->logicop_enable;
if (state->logicop_enable) {
color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
} else {
color_control |= S_028808_ROP3(0xcc);
}
si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK,
S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) |
S_028B70_ALPHA_TO_MASK_OFFSET0(2) |
@@ -623,20 +624,27 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit)
sctx->do_update_shaders = true;
if (sctx->screen->dpbb_allowed &&
(!old_blend ||
old_blend->alpha_to_coverage != blend->alpha_to_coverage ||
old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit))
si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+
+ if (sctx->screen->has_out_of_order_rast &&
+ (!old_blend ||
+ (old_blend->blend_enable_4bit != blend->blend_enable_4bit ||
+ old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit ||
+ old_blend->logicop_enable != blend->logicop_enable)))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
static void si_delete_blend_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state);
}
static void si_set_blend_color(struct pipe_context *ctx,
const struct pipe_blend_color *state)
@@ -1052,20 +1060,44 @@ static uint32_t si_translate_stencil_op(int s_op)
}
static bool si_dsa_writes_stencil(const struct pipe_stencil_state *s)
{
return s->enabled && s->writemask &&
(s->fail_op != PIPE_STENCIL_OP_KEEP ||
s->zfail_op != PIPE_STENCIL_OP_KEEP ||
s->zpass_op != PIPE_STENCIL_OP_KEEP);
}
+static bool si_order_invariant_stencil_op(enum pipe_stencil_op op)
+{
+ /* REPLACE is normally order invariant, except when the stencil
+ * reference value is written by the fragment shader. Tracking this
+ * interaction does not seem worth the effort, so be conservative. */
+ return op != PIPE_STENCIL_OP_INCR &&
+ op != PIPE_STENCIL_OP_DECR &&
+ op != PIPE_STENCIL_OP_REPLACE;
+}
+
+/* Compute whether, assuming Z writes are disabled, this stencil state is order
+ * invariant in the sense that the set of passing fragments as well as the
+ * final stencil buffer result does not depend on the order of fragments. */
+static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state)
+{
+ return !state->enabled || !state->writemask ||
+ /* The following assumes that Z writes are disabled. */
+ (state->func == PIPE_FUNC_ALWAYS &&
+ si_order_invariant_stencil_op(state->zpass_op) &&
+ si_order_invariant_stencil_op(state->zfail_op)) ||
+ (state->func == PIPE_FUNC_NEVER &&
+ si_order_invariant_stencil_op(state->fail_op));
+}
+
static void *si_create_dsa_state(struct pipe_context *ctx,
const struct pipe_depth_stencil_alpha_state *state)
{
struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa);
struct si_pm4_state *pm4 = &dsa->pm4;
unsigned db_depth_control;
uint32_t db_stencil_control = 0;
if (!dsa) {
return NULL;
@@ -1118,20 +1150,58 @@ static void *si_create_dsa_state(struct pipe_context *ctx,
dsa->depth_enabled = state->depth.enabled;
dsa->depth_write_enabled = state->depth.enabled &&
state->depth.writemask;
dsa->stencil_enabled = state->stencil[0].enabled;
dsa->stencil_write_enabled = state->stencil[0].enabled &&
(si_dsa_writes_stencil(&state->stencil[0]) ||
si_dsa_writes_stencil(&state->stencil[1]));
dsa->db_can_write = dsa->depth_write_enabled ||
dsa->stencil_write_enabled;
+
+ bool zfunc_is_ordered =
+ state->depth.func == PIPE_FUNC_NEVER ||
+ state->depth.func == PIPE_FUNC_LESS ||
+ state->depth.func == PIPE_FUNC_LEQUAL ||
+ state->depth.func == PIPE_FUNC_GREATER ||
+ state->depth.func == PIPE_FUNC_GEQUAL;
+
+ bool nozwrite_and_order_invariant_stencil =
+ !dsa->db_can_write ||
+ (!dsa->depth_write_enabled &&
+ si_order_invariant_stencil_state(&state->stencil[0]) &&
+ si_order_invariant_stencil_state(&state->stencil[1]));
+
+ dsa->order_invariance[1].zs =
+ nozwrite_and_order_invariant_stencil ||
+ (!dsa->stencil_write_enabled && zfunc_is_ordered);
+ dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered;
+
+ dsa->order_invariance[1].pass_set =
+ nozwrite_and_order_invariant_stencil ||
+ (!dsa->stencil_write_enabled &&
+ (state->depth.func == PIPE_FUNC_ALWAYS ||
+ state->depth.func == PIPE_FUNC_NEVER));
+ dsa->order_invariance[0].pass_set =
+ !dsa->depth_write_enabled ||
+ (state->depth.func == PIPE_FUNC_ALWAYS ||
+ state->depth.func == PIPE_FUNC_NEVER);
+
+ const bool assume_no_z_fights = false;
+
+ dsa->order_invariance[1].pass_last =
+ assume_no_z_fights && !dsa->stencil_write_enabled &&
+ dsa->depth_write_enabled && zfunc_is_ordered;
+ dsa->order_invariance[0].pass_last =
+ assume_no_z_fights &&
+ dsa->depth_write_enabled && zfunc_is_ordered;
+
return dsa;
}
static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_state_dsa *old_dsa = sctx->queued.named.dsa;
struct si_state_dsa *dsa = state;
if (!state)
@@ -1147,20 +1217,26 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state)
if (!old_dsa || old_dsa->alpha_func != dsa->alpha_func)
sctx->do_update_shaders = true;
if (sctx->screen->dpbb_allowed &&
(!old_dsa ||
(old_dsa->depth_enabled != dsa->depth_enabled ||
old_dsa->stencil_enabled != dsa->stencil_enabled ||
old_dsa->db_can_write != dsa->db_can_write)))
si_mark_atom_dirty(sctx, &sctx->dpbb_state);
+
+ if (sctx->screen->has_out_of_order_rast &&
+ (!old_dsa ||
+ memcmp(old_dsa->order_invariance, dsa->order_invariance,
+ sizeof(old_dsa->order_invariance))))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
static void si_delete_dsa_state(struct pipe_context *ctx, void *state)
{
struct si_context *sctx = (struct si_context *)ctx;
si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state);
}
static void *si_create_db_flush_dsa(struct si_context *sctx)
{
@@ -1191,20 +1267,25 @@ static void si_set_active_query_state(struct pipe_context *ctx, boolean enable)
}
}
static void si_set_occlusion_query_state(struct pipe_context *ctx,
bool old_enable,
bool old_perfect_enable)
{
struct si_context *sctx = (struct si_context*)ctx;
si_mark_atom_dirty(sctx, &sctx->db_render_state);
+
+ bool perfect_enable = sctx->b.num_perfect_occlusion_queries != 0;
+
+ if (perfect_enable != old_perfect_enable)
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
static void si_save_qbo_state(struct pipe_context *ctx, struct r600_qbo_state *st)
{
struct si_context *sctx = (struct si_context*)ctx;
st->saved_compute = sctx->cs_shader_state.program;
si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0);
si_get_shader_buffers(sctx, PIPE_SHADER_COMPUTE, 0, 3, st->saved_ssbo);
@@ -2534,20 +2615,25 @@ static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *sta
static void si_set_framebuffer_state(struct pipe_context *ctx,
const struct pipe_framebuffer_state *state)
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_constant_buffer constbuf = {0};
struct r600_surface *surf = NULL;
struct r600_texture *rtex;
bool old_any_dst_linear = sctx->framebuffer.any_dst_linear;
unsigned old_nr_samples = sctx->framebuffer.nr_samples;
+ unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit;
+ bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf;
+ bool old_has_stencil =
+ old_has_zsbuf &&
+ ((struct r600_texture*)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil;
bool unbound = false;
int i;
si_update_fb_dirtiness_after_rendering(sctx);
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
if (!sctx->framebuffer.state.cbufs[i])
continue;
rtex = (struct r600_texture*)sctx->framebuffer.state.cbufs[i]->texture;
@@ -2691,44 +2777,52 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
p_atomic_inc(&rtex->framebuffers_bound);
if (rtex->dcc_gather_statistics) {
/* Dirty tracking must be enabled for DCC usage analysis. */
sctx->framebuffer.compressed_cb_mask |= 1 << i;
vi_separate_dcc_start_query(ctx, rtex);
}
}
+ struct r600_texture *zstex = NULL;
+
if (state->zsbuf) {
surf = (struct r600_surface*)state->zsbuf;
- rtex = (struct r600_texture*)surf->base.texture;
+ zstex = (struct r600_texture*)surf->base.texture;
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
- if (vi_tc_compat_htile_enabled(rtex, surf->base.u.tex.level))
+ if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
sctx->framebuffer.DB_has_shader_readable_metadata = true;
r600_context_add_resource_size(ctx, surf->base.texture);
}
si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->screen->dpbb_allowed)
si_mark_atom_dirty(sctx, &sctx->dpbb_state);
if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
+ if (sctx->screen->has_out_of_order_rast &&
+ (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit ||
+ !!sctx->framebuffer.state.zsbuf != old_has_zsbuf ||
+ (zstex && zstex->surface.has_stencil != old_has_stencil)))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
+
if (sctx->framebuffer.nr_samples != old_nr_samples) {
si_mark_atom_dirty(sctx, &sctx->msaa_config);
si_mark_atom_dirty(sctx, &sctx->db_render_state);
/* Set sample locations as fragment shader constants. */
switch (sctx->framebuffer.nr_samples) {
case 1:
constbuf.user_buffer = sctx->b.sample_locations_1x;
break;
case 2:
@@ -3051,30 +3145,89 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx,
if (has_msaa_sample_loc_bug &&
sctx->framebuffer.nr_samples > 1 &&
rs && !rs->multisample_enable)
small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE;
radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
small_prim_filter_cntl);
}
}
+static bool si_out_of_order_rasterization(struct si_context *sctx)
+{
+ struct si_state_blend *blend = sctx->queued.named.blend;
+ struct si_state_dsa *dsa = sctx->queued.named.dsa;
+
+ if (!sctx->screen->has_out_of_order_rast)
+ return false;
+
+ unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit;
+
+ if (blend) {
+ colormask &= blend->cb_target_enabled_4bit;
+ } else {
+ colormask = 0;
+ }
+
+ /* Conservative: No logic op. */
+ if (colormask && blend->logicop_enable)
+ return false;
+
+ struct si_dsa_order_invariance dsa_order_invariant = {
+ .zs = true, .pass_set = true, .pass_last = false
+ };
+
+ if (sctx->framebuffer.state.zsbuf) {
+ struct r600_texture *zstex =
+ (struct r600_texture*)sctx->framebuffer.state.zsbuf->texture;
+ bool has_stencil = zstex->surface.has_stencil;
+ dsa_order_invariant = dsa->order_invariance[has_stencil];
+ if (!dsa_order_invariant.zs)
+ return false;
+
+ /* The set of PS invocations is always order invariant,
+ * except when early Z/S tests are requested. */
+ if (sctx->ps_shader.cso &&
+ sctx->ps_shader.cso->info.writes_memory &&
+ sctx->ps_shader.cso->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] &&
+ !dsa_order_invariant.pass_set)
+ return false;
+
+ if (sctx->b.num_perfect_occlusion_queries != 0 &&
+ !dsa_order_invariant.pass_set)
+ return false;
+ }
+
+ if (!colormask)
+ return true;
+
+ bool blend_enabled = (colormask & blend->blend_enable_4bit) != 0;
+
+ if (blend_enabled)
+ return false; /* TODO */
+
+ return dsa_order_invariant.pass_last;
+}
+
static void si_emit_msaa_config(struct si_context *sctx, struct r600_atom *atom)
{
struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
unsigned num_tile_pipes = sctx->screen->b.info.num_tile_pipes;
/* 33% faster rendering to linear color buffers */
bool dst_is_linear = sctx->framebuffer.any_dst_linear;
+ bool out_of_order_rast = si_out_of_order_rasterization(sctx);
unsigned sc_mode_cntl_1 =
S_028A4C_WALK_SIZE(dst_is_linear) |
S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) |
S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
+ S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
+ S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
/* always 1: */
S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
S_028A4C_FORCE_EOV_REZ_ENABLE(1);
cayman_emit_msaa_config(cs, sctx->framebuffer.nr_samples,
sctx->ps_iter_samples,
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 17d210a3af2..56e597a5813 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -42,29 +42,30 @@
#define SI_NUM_IMAGES 16
#define SI_NUM_SHADER_BUFFERS 16
struct si_screen;
struct si_shader;
struct si_shader_selector;
struct si_state_blend {
struct si_pm4_state pm4;
uint32_t cb_target_mask;
- bool alpha_to_coverage;
- bool alpha_to_one;
- bool dual_src_blend;
/* Set 0xf or 0x0 (4 bits) per render target if the following is
* true. ANDed with spi_shader_col_format.
*/
unsigned cb_target_enabled_4bit;
unsigned blend_enable_4bit;
unsigned need_src_alpha_4bit;
+ bool alpha_to_coverage:1;
+ bool alpha_to_one:1;
+ bool dual_src_blend:1;
+ bool logicop_enable:1;
};
struct si_state_rasterizer {
struct si_pm4_state pm4;
/* poly offset states for 16-bit, 24-bit, and 32-bit zbuffers */
struct si_pm4_state *pm4_poly_offset;
unsigned pa_sc_line_stipple;
unsigned pa_cl_clip_cntl;
unsigned sprite_coord_enable:8;
unsigned clip_plane_enable:8;
@@ -82,29 +83,50 @@ struct si_state_rasterizer {
unsigned rasterizer_discard:1;
unsigned scissor_enable:1;
unsigned clip_halfz:1;
};
struct si_dsa_stencil_ref_part {
uint8_t valuemask[2];
uint8_t writemask[2];
};
+struct si_dsa_order_invariance {
+ /** Whether the final result in Z/S buffers is guaranteed to be
+ * invariant under changes to the order in which fragments arrive. */
+ bool zs:1;
+
+ /** Whether the set of fragments that pass the combined Z/S test is
+ * guaranteed to be invariant under changes to the order in which
+ * fragments arrive. */
+ bool pass_set:1;
+
+ /** Whether the last fragment that passes the combined Z/S test at each
+ * sample is guaranteed to be invariant under changes to the order in
+ * which fragments arrive. */
+ bool pass_last:1;
+};
+
struct si_state_dsa {
struct si_pm4_state pm4;
struct si_dsa_stencil_ref_part stencil_ref;
+
+ /* 0 = without stencil buffer, 1 = when both Z and S buffers are present */
+ struct si_dsa_order_invariance order_invariance[2];
+
ubyte alpha_func:3;
bool depth_enabled:1;
bool depth_write_enabled:1;
bool stencil_enabled:1;
bool stencil_write_enabled:1;
bool db_can_write:1;
+
};
struct si_stencil_ref {
struct r600_atom atom;
struct pipe_stencil_ref state;
struct si_dsa_stencil_ref_part dsa_part;
};
struct si_vertex_elements
{
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 9f76551cfbb..bed24798d8d 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2402,20 +2402,27 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
si_update_common_shader_state(sctx);
if (sel) {
if (sctx->ia_multi_vgt_param_key.u.uses_tess)
si_update_tess_uses_prim_id(sctx);
if (!old_sel ||
old_sel->info.colors_written != sel->info.colors_written)
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
+
+ if (sctx->screen->has_out_of_order_rast &&
+ (!old_sel ||
+ old_sel->info.writes_memory != sel->info.writes_memory ||
+ old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] !=
+ sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]))
+ si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
si_set_active_descriptors_for_shader(sctx, sel);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
{
if (shader->is_optimized) {
util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
&shader->optimized_ready);
util_queue_fence_destroy(&shader->optimized_ready);
--
2.11.0
More information about the mesa-dev
mailing list