Mesa (master): r600: use DX10_CLAMP bit in shader setup

Roland Scheidegger sroland at kemper.freedesktop.org
Wed Nov 15 02:19:12 UTC 2017


Module: Mesa
Branch: master
Commit: 3835009796166968750ff46cf209f6d4208cda86
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3835009796166968750ff46cf209f6d4208cda86

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Thu Nov  9 19:41:29 2017 +0100

r600: use DX10_CLAMP bit in shader setup

The docs are not very concise in what this really does, however both
Alex Deucher and Nicolai Hähnle suggested this only really affects instructions
using the CLAMP output modifier, and I've confirmed that with the newly
changed piglit isinf_and_isnan test.
So, with this bit set, if an instruction has the CLAMP modifier bit (which
clamps to [0,1]) set, then NaNs will be converted to zero, otherwise the result
will be NaN.
D3D10 would require this, glsl doesn't have modifiers (with mesa
clamp(x,0,1) would get converted to such a modifier) coupled with a
whatever-floats-your-boat specified NaN behavior, but the clamp behavior
should probably always be used (this also matches what a decomposition into
min(1.0, max(x, 0.0)) would do, if min/max also adhere to the ieee spec of
picking the non-nan result).
Some apps may in fact rely on this, as this prevents misrenderings in
This War of Mine since using ieee muls
(ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped
rcp opcode, which would also fix this bug there.
radeonsi also seems to set this bit nowadays if I see that righ (albeit the
llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0"
instead of "Do not clamp NAN to 0" since it was changed, which also looks
a bit misleading).

v2: set it in all shader stages.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544

Reviewed-by: Dave Airlie <airlied at redhat.com>

---

 src/gallium/drivers/r600/evergreen_state.c | 6 ++++++
 src/gallium/drivers/r600/r600_state.c      | 9 +++++++++
 2 files changed, 15 insertions(+)

diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index b02d7eeca6..7c2dfa092d 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -3244,6 +3244,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
 	r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
 			 S_028844_NUM_GPRS(rshader->bc.ngpr) |
 			 S_028844_PRIME_CACHE_ON_DRAW(1) |
+			 S_028844_DX10_CLAMP(1) |
 			 S_028844_STACK_SIZE(rshader->bc.nstack));
 	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
 
@@ -3264,6 +3265,7 @@ void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader
 
 	r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
 			       S_028890_NUM_GPRS(rshader->bc.ngpr) |
+			       S_028890_DX10_CLAMP(1) |
 			       S_028890_STACK_SIZE(rshader->bc.nstack));
 	r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
 			       shader->bo->gpu_address >> 8);
@@ -3326,6 +3328,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
 
 	r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
 			       S_028878_NUM_GPRS(rshader->bc.ngpr) |
+			       S_028878_DX10_CLAMP(1) |
 			       S_028878_STACK_SIZE(rshader->bc.nstack));
 	r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
 			       shader->bo->gpu_address >> 8);
@@ -3366,6 +3369,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader
 			       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
 	r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
 			       S_028860_NUM_GPRS(rshader->bc.ngpr) |
+			       S_028860_DX10_CLAMP(1) |
 			       S_028860_STACK_SIZE(rshader->bc.nstack));
 	if (rshader->vs_position_window_space) {
 		r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
@@ -3400,6 +3404,7 @@ void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader
 	r600_init_command_buffer(cb, 32);
 	r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
 			       S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
+			       S_0288BC_DX10_CLAMP(1) |
 			       S_0288BC_STACK_SIZE(rshader->bc.nstack));
 	r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
 			       shader->bo->gpu_address >> 8);
@@ -3413,6 +3418,7 @@ void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader
 	r600_init_command_buffer(cb, 32);
 	r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS,
 			       S_0288D4_NUM_GPRS(rshader->bc.ngpr) |
+			       S_0288D4_DX10_CLAMP(1) |
 			       S_0288D4_STACK_SIZE(rshader->bc.nstack));
 	r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS,
 			       shader->bo->gpu_address >> 8);
diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
index 0c33153746..0e266aef42 100644
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2549,6 +2549,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 	r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
 	r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
 			 S_028850_NUM_GPRS(rshader->bc.ngpr) |
+	/*
+	 * docs are misleading about the dx10_clamp bit. This only affects
+	 * instructions using CLAMP dst modifier, in which case they will
+	 * return 0 with this set for a NaN (otherwise NaN).
+	 */
+			 S_028850_DX10_CLAMP(1) |
 			 S_028850_STACK_SIZE(rshader->bc.nstack) |
 			 S_028850_UNCACHED_FIRST_INST(ufi));
 	r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
@@ -2598,6 +2604,7 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 			       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
 	r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS,
 			       S_028868_NUM_GPRS(rshader->bc.ngpr) |
+			       S_028868_DX10_CLAMP(1) |
 			       S_028868_STACK_SIZE(rshader->bc.nstack));
 	if (rshader->vs_position_window_space) {
 		r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
@@ -2682,6 +2689,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 
 	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS,
 			       S_02887C_NUM_GPRS(rshader->bc.ngpr) |
+			       S_02887C_DX10_CLAMP(1) |
 			       S_02887C_STACK_SIZE(rshader->bc.nstack));
 	r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0);
 	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
@@ -2696,6 +2704,7 @@ void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
 
 	r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
 			       S_028890_NUM_GPRS(rshader->bc.ngpr) |
+			       S_028890_DX10_CLAMP(1) |
 			       S_028890_STACK_SIZE(rshader->bc.nstack));
 	r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0);
 	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */




More information about the mesa-commit mailing list