[Mesa-dev] [PATCH 04/13] radeonsi: enable LLVM optimizations that assume no NaNs for non-compute shaders

Tom Stellard tom at stellard.net
Wed Jan 7 07:56:47 PST 2015


On Wed, Jan 07, 2015 at 12:58:12PM +0100, Marek Olšák wrote:
> How about the attached patch?
> 

Looks good, thanks.

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

> Marek
> 
> On Wed, Jan 7, 2015 at 1:23 AM, Tom Stellard <tom at stellard.net> wrote:
> > On Wed, Jan 07, 2015 at 01:13:37AM +0100, Marek Olšák wrote:
> >> Neither. It's because we use DX10_CLAMP, which converts NaNs to 0.
> >>
> >
> > Ok, could we add a dx10_clamp bit to si_shader and make this attribute
> > conditional on that bit.  I'm concerned someone may remove DX10_CLAMP
> > and forget to also remove this attribute.
> >
> > -Tom
> >
> >> Marek
> >>
> >> On Wed, Jan 7, 2015 at 12:51 AM, Tom Stellard <tom at stellard.net> wrote:
> >> > On Mon, Jan 05, 2015 at 12:18:43AM +0100, Marek Olšák wrote:
> >> >> From: Marek Olšák <marek.olsak at amd.com>
> >> >>
> >> >> ---
> >> >>  src/gallium/drivers/radeon/radeon_llvm_emit.c | 1 +
> >> >>  1 file changed, 1 insertion(+)
> >> >>
> >> >> diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> >> >> index dc871d7..e3be72c 100644
> >> >> --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
> >> >> +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> >> >> @@ -83,6 +83,7 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
> >> >>
> >> >>       if (type != TGSI_PROCESSOR_COMPUTE) {
> >> >>               LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
> >> >> +             LLVMAddTargetDependentFunctionAttr(F, "enable-no-nans-fp-math", "true");
> >> >
> >> > Is this required by the OpenGL spec or is it just to fix broken/old
> >> > games?
> >> >
> >> > -Tom
> >> >
> >> >>       }
> >> >>  }
> >> >>
> >> >> --
> >> >> 2.1.0
> >> >>
> >> >> _______________________________________________
> >> >> mesa-dev mailing list
> >> >> mesa-dev at lists.freedesktop.org
> >> >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

> From d960b773f3bc99928b4aab5c4344aea671595849 Mon Sep 17 00:00:00 2001
> From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= <marek.olsak at amd.com>
> Date: Sun, 4 Jan 2015 17:08:57 +0100
> Subject: [PATCH] radeonsi: enable LLVM optimizations that assume no NaNs for
>  non-compute shaders
> 
> v2: complete rewrite
> ---
>  src/gallium/drivers/radeonsi/si_shader.c        | 7 +++++++
>  src/gallium/drivers/radeonsi/si_shader.h        | 1 +
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 8 ++++----
>  3 files changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index 5d61a54..cf28860 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -2369,6 +2369,10 @@ static void create_function(struct si_shader_context *si_shader_ctx)
>  	radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
>  	radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
>  
> +	if (shader->dx10_clamp_mode)
> +		LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
> +						   "enable-no-nans-fp-math", "true");
> +
>  	for (i = 0; i <= last_sgpr; ++i) {
>  		LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
>  
> @@ -2723,6 +2727,9 @@ int si_shader_create(struct si_screen *sscreen, struct si_shader *shader)
>  	radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
>  	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
>  
> +	if (sel->type != PIPE_SHADER_COMPUTE)
> +		shader->dx10_clamp_mode = true;
> +
>  	if (sel->info.uses_kill)
>  		shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
>  
> diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
> index 21692f0..08e344a 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.h
> +++ b/src/gallium/drivers/radeonsi/si_shader.h
> @@ -160,6 +160,7 @@ struct si_shader {
>  	bool			uses_instanceid;
>  	unsigned		nr_pos_exports;
>  	bool			is_gs_copy_shader;
> +	bool			dx10_clamp_mode; /* convert NaNs to 0 */
>  };
>  
>  static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index e51d50e..817a990 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -65,7 +65,7 @@ static void si_shader_es(struct si_shader *shader)
>  		       S_00B328_VGPRS((shader->num_vgprs - 1) / 4) |
>  		       S_00B328_SGPRS((num_sgprs - 1) / 8) |
>  		       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -		       S_00B328_DX10_CLAMP(1));
> +		       S_00B328_DX10_CLAMP(shader->dx10_clamp_mode));
>  	si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
>  		       S_00B32C_USER_SGPR(num_user_sgprs));
>  }
> @@ -134,7 +134,7 @@ static void si_shader_gs(struct si_shader *shader)
>  	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
>  		       S_00B228_VGPRS((shader->num_vgprs - 1) / 4) |
>  		       S_00B228_SGPRS((num_sgprs - 1) / 8) |
> -		       S_00B228_DX10_CLAMP(1));
> +		       S_00B228_DX10_CLAMP(shader->dx10_clamp_mode));
>  	si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
>  		       S_00B22C_USER_SGPR(num_user_sgprs));
>  }
> @@ -209,7 +209,7 @@ static void si_shader_vs(struct si_shader *shader)
>  		       S_00B128_VGPRS((shader->num_vgprs - 1) / 4) |
>  		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
>  		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -		       S_00B128_DX10_CLAMP(1));
> +		       S_00B128_DX10_CLAMP(shader->dx10_clamp_mode));
>  	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
>  		       S_00B12C_USER_SGPR(num_user_sgprs) |
>  		       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
> @@ -304,7 +304,7 @@ static void si_shader_ps(struct si_shader *shader)
>  	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
>  		       S_00B028_VGPRS((shader->num_vgprs - 1) / 4) |
>  		       S_00B028_SGPRS((num_sgprs - 1) / 8) |
> -		       S_00B028_DX10_CLAMP(1));
> +		       S_00B028_DX10_CLAMP(shader->dx10_clamp_mode));
>  	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
>  		       S_00B02C_EXTRA_LDS_SIZE(shader->lds_size) |
>  		       S_00B02C_USER_SGPR(num_user_sgprs));
> -- 
> 2.1.0
> 



More information about the mesa-dev mailing list