[Mesa-dev] [PATCH] radeonsi: enable denorms for 64-bit and 16-bit floats

Roland Scheidegger sroland at vmware.com
Mon Feb 8 12:11:12 UTC 2016


This looks good to me, albeit I know nothing about the hw.
So VI could do (just with some restrictios) even full-speed fp32 denorms
whereas SI/CI can't? Interesting, I suppose that would be intended for
compute. intel x86 can't even do that (actually, I think skylake can),
though certainly other cpus could do that for ages.

(Albeit there's still nothing in the glsl spec which says this is
required for fp16 pack...)

Roland

Am 06.02.2016 um 13:15 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> This fixes FP16 conversion instructions for VI, which has 16-bit floats,
> but not SI & CI, which can't disable denorms for those instructions.
> ---
>  src/gallium/drivers/radeonsi/si_shader.c        | 14 ++++++++++++++
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 18 ++++++++++++------
>  src/gallium/drivers/radeonsi/sid.h              |  3 +++
>  3 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
> index a4680ce..3f1db70 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -4155,6 +4155,20 @@ int si_compile_llvm(struct si_screen *sscreen,
>  
>  	si_shader_binary_read_config(binary, conf, 0);
>  
> +	/* Enable 64-bit and 16-bit denormals, because there is no performance
> +	 * cost.
> +	 *
> +	 * If denormals are enabled, all floating-point output modifiers are
> +	 * ignored.
> +	 *
> +	 * Don't enable denormals for 32-bit floats, because:
> +	 * - Floating-point output modifiers would be ignored by the hw.
> +	 * - Some opcodes don't support denormals, such as v_mad_f32. We would
> +	 *   have to stop using those.
> +	 * - SI & CI would be very slow.
> +	 */
> +	conf->float_mode |= V_00B028_FP_64_DENORMS;
> +
>  	FREE(binary->config);
>  	FREE(binary->global_symbol_offsets);
>  	binary->config = NULL;
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index ce795c0..77a4e47 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -124,7 +124,8 @@ static void si_shader_ls(struct si_shader *shader)
>  	shader->config.rsrc1 = S_00B528_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  			   S_00B528_SGPRS((num_sgprs - 1) / 8) |
>  		           S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -			   S_00B528_DX10_CLAMP(1);
> +			   S_00B528_DX10_CLAMP(1) |
> +			   S_00B528_FLOAT_MODE(shader->config.float_mode);
>  	shader->config.rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
>  			   S_00B52C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0);
>  }
> @@ -157,7 +158,8 @@ static void si_shader_hs(struct si_shader *shader)
>  	si_pm4_set_reg(pm4, R_00B428_SPI_SHADER_PGM_RSRC1_HS,
>  		       S_00B428_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  		       S_00B428_SGPRS((num_sgprs - 1) / 8) |
> -		       S_00B428_DX10_CLAMP(1));
> +		       S_00B428_DX10_CLAMP(1) |
> +		       S_00B428_FLOAT_MODE(shader->config.float_mode));
>  	si_pm4_set_reg(pm4, R_00B42C_SPI_SHADER_PGM_RSRC2_HS,
>  		       S_00B42C_USER_SGPR(num_user_sgprs) |
>  		       S_00B42C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> @@ -203,7 +205,8 @@ static void si_shader_es(struct si_shader *shader)
>  		       S_00B328_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  		       S_00B328_SGPRS((num_sgprs - 1) / 8) |
>  		       S_00B328_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -		       S_00B328_DX10_CLAMP(1));
> +		       S_00B328_DX10_CLAMP(1) |
> +		       S_00B328_FLOAT_MODE(shader->config.float_mode));
>  	si_pm4_set_reg(pm4, R_00B32C_SPI_SHADER_PGM_RSRC2_ES,
>  		       S_00B32C_USER_SGPR(num_user_sgprs) |
>  		       S_00B32C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> @@ -292,7 +295,8 @@ static void si_shader_gs(struct si_shader *shader)
>  	si_pm4_set_reg(pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
>  		       S_00B228_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  		       S_00B228_SGPRS((num_sgprs - 1) / 8) |
> -		       S_00B228_DX10_CLAMP(1));
> +		       S_00B228_DX10_CLAMP(1) |
> +		       S_00B228_FLOAT_MODE(shader->config.float_mode));
>  	si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
>  		       S_00B22C_USER_SGPR(num_user_sgprs) |
>  		       S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0));
> @@ -381,7 +385,8 @@ static void si_shader_vs(struct si_shader *shader, struct si_shader *gs)
>  		       S_00B128_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  		       S_00B128_SGPRS((num_sgprs - 1) / 8) |
>  		       S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt) |
> -		       S_00B128_DX10_CLAMP(1));
> +		       S_00B128_DX10_CLAMP(1) |
> +		       S_00B128_FLOAT_MODE(shader->config.float_mode));
>  	si_pm4_set_reg(pm4, R_00B12C_SPI_SHADER_PGM_RSRC2_VS,
>  		       S_00B12C_USER_SGPR(num_user_sgprs) |
>  		       S_00B12C_SO_BASE0_EN(!!shader->selector->so.stride[0]) |
> @@ -567,7 +572,8 @@ static void si_shader_ps(struct si_shader *shader)
>  	si_pm4_set_reg(pm4, R_00B028_SPI_SHADER_PGM_RSRC1_PS,
>  		       S_00B028_VGPRS((shader->config.num_vgprs - 1) / 4) |
>  		       S_00B028_SGPRS((num_sgprs - 1) / 8) |
> -		       S_00B028_DX10_CLAMP(1));
> +		       S_00B028_DX10_CLAMP(1) |
> +		       S_00B028_FLOAT_MODE(shader->config.float_mode));
>  	si_pm4_set_reg(pm4, R_00B02C_SPI_SHADER_PGM_RSRC2_PS,
>  		       S_00B02C_EXTRA_LDS_SIZE(shader->config.lds_size) |
>  		       S_00B02C_USER_SGPR(num_user_sgprs) |
> diff --git a/src/gallium/drivers/radeonsi/sid.h b/src/gallium/drivers/radeonsi/sid.h
> index 9e1e158..8920847 100644
> --- a/src/gallium/drivers/radeonsi/sid.h
> +++ b/src/gallium/drivers/radeonsi/sid.h
> @@ -2845,6 +2845,9 @@
>  #define   S_00B028_FLOAT_MODE(x)                                      (((x) & 0xFF) << 12)
>  #define   G_00B028_FLOAT_MODE(x)                                      (((x) >> 12) & 0xFF)
>  #define   C_00B028_FLOAT_MODE                                         0xFFF00FFF
> +#define     V_00B028_FP_32_DENORMS					0x30
> +#define     V_00B028_FP_64_DENORMS					0xc0
> +#define     V_00B028_FP_ALL_DENORMS					0xf0
>  #define   S_00B028_PRIV(x)                                            (((x) & 0x1) << 20)
>  #define   G_00B028_PRIV(x)                                            (((x) >> 20) & 0x1)
>  #define   C_00B028_PRIV                                               0xFFEFFFFF
> 



More information about the mesa-dev mailing list