[Mesa-dev] [PATCH 2/2] radeonsi: adjust and clean up Z_ORDER and EXEC_ON_x settings
Marek Olšák
maraeo at gmail.com
Wed Oct 12 21:19:44 UTC 2016
From: Marek Olšák <marek.olsak at amd.com>
The table was copied from the Vulkan driver. The comment lines are as long
as the table for cosmetic reasons.
---
src/gallium/drivers/radeonsi/si_shader.h | 1 -
src/gallium/drivers/radeonsi/si_state_shaders.c | 53 +++++++++++++++----------
2 files changed, 32 insertions(+), 22 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index f2618ac..b07210c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -432,21 +432,20 @@ struct si_shader {
struct si_shader_part *prolog;
struct si_shader_part *epilog;
struct si_shader *gs_copy_shader;
struct si_pm4_state *pm4;
struct r600_resource *bo;
struct r600_resource *scratch_bo;
union si_shader_key key;
bool is_binary_shared;
- unsigned z_order;
/* The following data is all that's needed for binary shaders. */
struct radeon_shader_binary binary;
struct si_shader_config config;
struct si_shader_info info;
/* Shader key + LLVM IR + disassembly + statistics.
* Generated for debug contexts only.
*/
char *shader_log;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index be5c659..d339b84 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -800,34 +800,20 @@ static void si_shader_ps(struct si_shader *shader)
si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
S_00B028_DX10_CLAMP(1) |
S_00B028_FLOAT_MODE(shader->config.float_mode));
si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
-
- /* DON'T USE EARLY_Z_THEN_RE_Z !!!
- *
- * It decreases performance by 15% in DiRT: Showdown on Ultra settings.
- * And it has pretty complex shaders.
- *
- * Shaders with side effects that must execute independently of the
- * depth test require LATE_Z.
- */
- if (info->writes_memory &&
- !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
- shader->z_order = V_02880C_LATE_Z;
- else
- shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
}
static void si_shader_init_pm4_state(struct si_screen *sscreen,
struct si_shader *shader)
{
switch (shader->selector->type) {
case PIPE_SHADER_VERTEX:
if (shader->key.vs.as_ls)
si_shader_ls(shader);
else if (shader->key.vs.as_es)
@@ -1364,26 +1350,52 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
case TGSI_FS_DEPTH_LAYOUT_GREATER:
sel->db_shader_control |=
S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
break;
case TGSI_FS_DEPTH_LAYOUT_LESS:
sel->db_shader_control |=
S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
break;
}
- if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
- sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1);
+ /* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
+ *
+ * | early Z/S | writes_mem | allow_ReZ? | Z_ORDER | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
+ * --|-----------|------------|------------|--------------------|-------------------|-------------
+ * 1a| false | false | true | EarlyZ_Then_ReZ | 0 | 0
+ * 1b| false | false | false | EarlyZ_Then_LateZ | 0 | 0
+ * 2 | false | true | n/a | LateZ | 1 | 0
+ * 3 | true | false | n/a | EarlyZ_Then_LateZ | 0 | 0
+ * 4 | true | true | n/a | EarlyZ_Then_LateZ | 0 | 1
+ *
+ * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
+ * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
+ *
+ * Don't use ReZ without profiling !!!
+ *
+ * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
+ * shaders.
+ */
+ if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) {
+ /* Cases 3, 4. */
+ sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
+ S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
+ S_02880C_EXEC_ON_NOOP(sel->info.writes_memory);
+ } else if (sel->info.writes_memory) {
+ /* Case 2. */
+ sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
+ S_02880C_EXEC_ON_HIER_FAIL(1);
+ } else {
+ /* Case 1. */
+ sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
+ }
- if (sel->info.writes_memory)
- sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
- S_02880C_EXEC_ON_NOOP(1);
pipe_mutex_init(sel->mutex);
util_queue_fence_init(&sel->ready);
if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
sctx->is_debug ||
r600_can_dump_shader(&sscreen->b, sel->info.processor) ||
!util_queue_is_initialized(&sscreen->shader_compiler_queue))
si_init_shader_selector_async(sel, -1);
else
util_queue_add_job(&sscreen->shader_compiler_queue, sel,
@@ -2206,22 +2218,21 @@ bool si_update_shaders(struct si_context *sctx)
if (sctx->ps_shader.cso) {
unsigned db_shader_control;
r = si_shader_select(ctx, &sctx->ps_shader);
if (r)
return false;
si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
db_shader_control =
sctx->ps_shader.cso->db_shader_control |
- S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS) |
- S_02880C_Z_ORDER(sctx->ps_shader.current->z_order);
+ S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
sctx->sprite_coord_enable != rs->sprite_coord_enable ||
sctx->flatshade != rs->flatshade) {
sctx->sprite_coord_enable = rs->sprite_coord_enable;
sctx->flatshade = rs->flatshade;
si_mark_atom_dirty(sctx, &sctx->spi_map);
}
if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
--
2.7.4
More information about the mesa-dev
mailing list