[Mesa-dev] [PATCH 2/2] radeonsi: adjust and clean up Z_ORDER and EXEC_ON_x settings

Nicolai Hähnle nhaehnle at gmail.com
Thu Oct 13 16:04:52 UTC 2016


The series is

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

On 12.10.2016 23:19, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
>
> The table was copied from the Vulkan driver. The comment lines are as long
> as the table for cosmetic reasons.
> ---
>  src/gallium/drivers/radeonsi/si_shader.h        |  1 -
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 53 +++++++++++++++----------
>  2 files changed, 32 insertions(+), 22 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index f2618ac..b07210c 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -432,21 +432,20 @@ struct si_shader {
>
>  	struct si_shader_part		*prolog;
>  	struct si_shader_part		*epilog;
>
>  	struct si_shader		*gs_copy_shader;
>  	struct si_pm4_state		*pm4;
>  	struct r600_resource		*bo;
>  	struct r600_resource		*scratch_bo;
>  	union si_shader_key		key;
>  	bool				is_binary_shared;
> -	unsigned			z_order;
>
>  	/* The following data is all that's needed for binary shaders. */
>  	struct radeon_shader_binary	binary;
>  	struct si_shader_config		config;
>  	struct si_shader_info		info;
>
>  	/* Shader key + LLVM IR + disassembly + statistics.
>  	 * Generated for debug contexts only.
>  	 */
>  	char				*shader_log;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index be5c659..d339b84 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -800,34 +800,20 @@ static void si_shader_ps(struct si_shader *shader)
>
>  	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
>  		       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  		       S_00B028_SGPRS((shader->config.num_sgprs - 1) / 8) |
>  		       S_00B028_DX10_CLAMP(1) |
>  		       S_00B028_FLOAT_MODE(shader->config.float_mode));
>  	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
>  		       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
>  		       S_00B02C_USER_SGPR(SI_PS_NUM_USER_SGPR) |
>  		       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> -
> -	/* DON'T USE EARLY_Z_THEN_RE_Z !!!
> -	 *
> -	 * It decreases performance by 15% in DiRT: Showdown on Ultra settings.
> -	 * And it has pretty complex shaders.
> -	 *
> -	 * Shaders with side effects that must execute independently of the
> -	 * depth test require LATE_Z.
> -	 */
> -	if (info->writes_memory &&
> -	    !info->properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
> -		shader->z_order = V_02880C_LATE_Z;
> -	else
> -		shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
>  }
>
>  static void si_shader_init_pm4_state(struct si_screen *sscreen,
>                                       struct si_shader *shader)
>  {
>  	switch (shader->selector->type) {
>  	case PIPE_SHADER_VERTEX:
>  		if (shader->key.vs.as_ls)
>  			si_shader_ls(shader);
>  		else if (shader->key.vs.as_es)
> @@ -1364,26 +1350,52 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
>  	case TGSI_FS_DEPTH_LAYOUT_GREATER:
>  		sel->db_shader_control |=
>  			S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_GREATER_THAN_Z);
>  		break;
>  	case TGSI_FS_DEPTH_LAYOUT_LESS:
>  		sel->db_shader_control |=
>  			S_02880C_CONSERVATIVE_Z_EXPORT(V_02880C_EXPORT_LESS_THAN_Z);
>  		break;
>  	}
>
> -	if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL])
> -		sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1);
> +	/* Z_ORDER, EXEC_ON_HIER_FAIL and EXEC_ON_NOOP should be set as following:
> +	 *
> +	 *   | early Z/S | writes_mem | allow_ReZ? |      Z_ORDER       | EXEC_ON_HIER_FAIL | EXEC_ON_NOOP
> +	 * --|-----------|------------|------------|--------------------|-------------------|-------------
> +	 * 1a|   false   |   false    |   true     | EarlyZ_Then_ReZ    |         0         |     0
> +	 * 1b|   false   |   false    |   false    | EarlyZ_Then_LateZ  |         0         |     0
> +	 * 2 |   false   |   true     |   n/a      |       LateZ        |         1         |     0
> +	 * 3 |   true    |   false    |   n/a      | EarlyZ_Then_LateZ  |         0         |     0
> +	 * 4 |   true    |   true     |   n/a      | EarlyZ_Then_LateZ  |         0         |     1
> +	 *
> +	 * In cases 3 and 4, HW will force Z_ORDER to EarlyZ regardless of what's set in the register.
> +	 * In case 2, NOOP_CULL is a don't care field. In case 2, 3 and 4, ReZ doesn't make sense.
> +	 *
> +	 * Don't use ReZ without profiling !!!
> +	 *
> +	 * ReZ decreases performance by 15% in DiRT: Showdown on Ultra settings, which has pretty complex
> +	 * shaders.
> +	 */
> +	if (sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]) {
> +		/* Cases 3, 4. */
> +		sel->db_shader_control |= S_02880C_DEPTH_BEFORE_SHADER(1) |
> +					  S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z) |
> +					  S_02880C_EXEC_ON_NOOP(sel->info.writes_memory);
> +	} else if (sel->info.writes_memory) {
> +		/* Case 2. */
> +		sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z) |
> +					  S_02880C_EXEC_ON_HIER_FAIL(1);
> +	} else {
> +		/* Case 1. */
> +		sel->db_shader_control |= S_02880C_Z_ORDER(V_02880C_EARLY_Z_THEN_LATE_Z);
> +	}
>
> -	if (sel->info.writes_memory)
> -		sel->db_shader_control |= S_02880C_EXEC_ON_HIER_FAIL(1) |
> -					  S_02880C_EXEC_ON_NOOP(1);
>  	pipe_mutex_init(sel->mutex);
>  	util_queue_fence_init(&sel->ready);
>
>  	if ((sctx->b.debug.debug_message && !sctx->b.debug.async) ||
>  	    sctx->is_debug ||
>  	    r600_can_dump_shader(&sscreen->b, sel->info.processor) ||
>  	    !util_queue_is_initialized(&sscreen->shader_compiler_queue))
>  		si_init_shader_selector_async(sel, -1);
>  	else
>  		util_queue_add_job(&sscreen->shader_compiler_queue, sel,
> @@ -2206,22 +2218,21 @@ bool si_update_shaders(struct si_context *sctx)
>  	if (sctx->ps_shader.cso) {
>  		unsigned db_shader_control;
>
>  		r = si_shader_select(ctx, &sctx->ps_shader);
>  		if (r)
>  			return false;
>  		si_pm4_bind_state(sctx, ps, sctx->ps_shader.current->pm4);
>
>  		db_shader_control =
>  			sctx->ps_shader.cso->db_shader_control |
> -			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS) |
> -			S_02880C_Z_ORDER(sctx->ps_shader.current->z_order);
> +			S_02880C_KILL_ENABLE(si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS);
>
>  		if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
>  		    sctx->sprite_coord_enable != rs->sprite_coord_enable ||
>  		    sctx->flatshade != rs->flatshade) {
>  			sctx->sprite_coord_enable = rs->sprite_coord_enable;
>  			sctx->flatshade = rs->flatshade;
>  			si_mark_atom_dirty(sctx, &sctx->spi_map);
>  		}
>
>  		if (sctx->b.family == CHIP_STONEY && si_pm4_state_changed(sctx, ps))
>


More information about the mesa-dev mailing list