[Mesa-dev] [PATCH 14/14] radeonsi: Allow TES distribution between shader engines.

Nicolai Hähnle nhaehnle at gmail.com
Tue May 10 17:11:17 UTC 2016



On 10.05.2016 05:53, Bas Nieuwenhuizen wrote:
> Setting 028B6C_DISTRIBUTION_MODE to a non-zero value and
> either setting 028B6C_NUM_DS_WAVES_PER_SIMD to a non-zero
> value or storing a zero control word hang my card.
>
> The R_028B50_VGT_TESS_DISTRIBUTION value is copied from
> amdgpu-pro. Smaller values in the ACCUM fields seem to
> decrease the performance advantage from this patch, higher
> values don't seem to matter.
>
> Signed-off-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
> ---
>   src/gallium/drivers/radeonsi/si_state.c         |  5 ++++
>   src/gallium/drivers/radeonsi/si_state_draw.c    |  8 ++++++
>   src/gallium/drivers/radeonsi/si_state_shaders.c | 36 ++++++++++++++-----------
>   3 files changed, 34 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index c4af77e..eb48a9e 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3787,6 +3787,11 @@ static void si_init_config(struct si_context *sctx)
>   			       S_028424_OVERWRITE_COMBINER_WATERMARK(4));
>   		si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
>   		si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
> +		si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
> +		               S_028B50_ACCUM_ISOLINE(32) |
> +		               S_028B50_ACCUM_TRI(11) |
> +		               S_028B50_ACCUM_QUAD(11) |
> +		               S_028B50_DONUT_SPLIT(16));
>   	}
>
>   	if (sctx->b.family == CHIP_STONEY)
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 3150489..7ad9422 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -271,6 +271,14 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
>   		     sctx->b.family == CHIP_BONAIRE) &&
>   		    sctx->gs_shader.cso)
>   			partial_vs_wave = true;
> +
> +		/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
> +		if (sctx->b.chip_class >= VI) {
> +			if (sctx->gs_shader.cso)
> +				partial_es_wave = true;
> +			else
> +				partial_vs_wave = true;
> +		}
>   	}
>
>   	/* This is a hardware requirement. */
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 43f4a84..d7ed31d 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -249,7 +249,8 @@ void si_destroy_shader_cache(struct si_screen *sscreen)
>
>   /* SHADER STATES */
>
> -static void si_set_tesseval_regs(struct si_shader *shader,
> +static void si_set_tesseval_regs(struct si_screen *sscreen,
> +				 struct si_shader *shader,
>   				 struct si_pm4_state *pm4)
>   {
>   	struct tgsi_shader_info *info = &shader->selector->info;
> @@ -257,7 +258,7 @@ static void si_set_tesseval_regs(struct si_shader *shader,
>   	unsigned tes_spacing = info->properties[TGSI_PROPERTY_TES_SPACING];
>   	bool tes_vertex_order_cw = info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW];
>   	bool tes_point_mode = info->properties[TGSI_PROPERTY_TES_POINT_MODE];
> -	unsigned type, partitioning, topology;
> +	unsigned type, partitioning, topology, distribution_mode;
>
>   	switch (tes_prim_mode) {
>   	case PIPE_PRIM_LINES:
> @@ -299,10 +300,13 @@ static void si_set_tesseval_regs(struct si_shader *shader,
>   	else
>   		topology = V_028B6C_OUTPUT_TRIANGLE_CW;
>
> +	distribution_mode = sscreen->b.chip_class >= VI ? 2 : 0;

The named values for distribution_mode are:

0 - NO_DIST
1 - PATCHES
2 - DONUTS

Makes sense to add those to sid.h.

Apart from that, patches 8-12 and this one are

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>

> +
>   	si_pm4_set_reg(pm4, R_028B6C_VGT_TF_PARAM,
>   		       S_028B6C_TYPE(type) |
>   		       S_028B6C_PARTITIONING(partitioning) |
> -		       S_028B6C_TOPOLOGY(topology));
> +		       S_028B6C_TOPOLOGY(topology) |
> +		       S_028B6C_DISTRIBUTION_MODE(distribution_mode));
>   }
>
>   static void si_shader_ls(struct si_shader *shader)
> @@ -359,7 +363,7 @@ static void si_shader_hs(struct si_shader *shader)
>   		       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
>   }
>
> -static void si_shader_es(struct si_shader *shader)
> +static void si_shader_es(struct si_screen *sscreen, struct si_shader *shader)
>   {
>   	struct si_pm4_state *pm4;
>   	unsigned num_user_sgprs;
> @@ -402,7 +406,7 @@ static void si_shader_es(struct si_shader *shader)
>   		       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
>
>   	if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
> -		si_set_tesseval_regs(shader, pm4);
> +		si_set_tesseval_regs(sscreen, shader, pm4);
>   }
>
>   /**
> @@ -489,7 +493,8 @@ static void si_shader_gs(struct si_shader *shader)
>    * If \p gs is non-NULL, it points to the geometry shader for which this shader
>    * is the copy shader.
>    */
> -static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
> +static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
> +                         struct si_shader *gs)
>   {
>   	struct si_pm4_state *pm4;
>   	unsigned num_user_sgprs;
> @@ -583,7 +588,7 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
>   			       S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
>
>   	if (shader->selector->type == PIPE_SHADER_TESS_EVAL)
> -		si_set_tesseval_regs(shader, pm4);
> +		si_set_tesseval_regs(sscreen, shader, pm4);
>   }
>
>   static unsigned si_get_ps_num_interp(struct si_shader *ps)
> @@ -764,7 +769,8 @@ static void si_shader_ps(struct si_shader *shader)
>   		shader->z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
>   }
>
> -static void si_shader_init_pm4_state(struct si_shader *shader)
> +static void si_shader_init_pm4_state(struct si_screen *sscreen,
> +                                     struct si_shader *shader)
>   {
>
>   	if (shader->pm4)
> @@ -775,22 +781,22 @@ static void si_shader_init_pm4_state(struct si_shader *shader)
>   		if (shader->key.vs.as_ls)
>   			si_shader_ls(shader);
>   		else if (shader->key.vs.as_es)
> -			si_shader_es(shader);
> +			si_shader_es(sscreen, shader);
>   		else
> -			si_shader_vs(shader, NULL);
> +			si_shader_vs(sscreen, shader, NULL);
>   		break;
>   	case PIPE_SHADER_TESS_CTRL:
>   		si_shader_hs(shader);
>   		break;
>   	case PIPE_SHADER_TESS_EVAL:
>   		if (shader->key.tes.as_es)
> -			si_shader_es(shader);
> +			si_shader_es(sscreen, shader);
>   		else
> -			si_shader_vs(shader, NULL);
> +			si_shader_vs(sscreen, shader, NULL);
>   		break;
>   	case PIPE_SHADER_GEOMETRY:
>   		si_shader_gs(shader);
> -		si_shader_vs(shader->gs_copy_shader, shader);
> +		si_shader_vs(sscreen, shader->gs_copy_shader, shader);
>   		break;
>   	case PIPE_SHADER_FRAGMENT:
>   		si_shader_ps(shader);
> @@ -984,7 +990,7 @@ static int si_shader_select_with_key(struct pipe_context *ctx,
>   		pipe_mutex_unlock(sel->mutex);
>   		return r;
>   	}
> -	si_shader_init_pm4_state(shader);
> +	si_shader_init_pm4_state(sctx->screen, shader);
>
>   	if (!sel->last_variant) {
>   		sel->first_variant = shader;
> @@ -1656,7 +1662,7 @@ static int si_update_scratch_buffer(struct si_context *sctx,
>   		return r;
>
>   	/* Update the shader state to use the new shader bo. */
> -	si_shader_init_pm4_state(shader);
> +	si_shader_init_pm4_state(sctx->screen, shader);
>
>   	r600_resource_reference(&shader->scratch_bo, sctx->scratch_buffer);
>
>


More information about the mesa-dev mailing list