[Mesa-dev] [Mesa-stable] [PATCH 2/5] r600: use DX10_CLAMP bit in shader setup

Andres Gomez agomez at igalia.com
Tue Nov 28 14:11:06 UTC 2017


On Tue, 2017-11-28 at 16:05 +0200, Andres Gomez wrote:
> Nicolai, this looks like a good candidate to nominate for inclusion in
  ^^^^^^^
I meant Roland 😕

> all the stable queues.
> 
> What do you think?
> 
> On Thu, 2017-11-09 at 20:00 +0100, sroland at vmware.com wrote:
> > From: Roland Scheidegger <sroland at vmware.com>
> > 
> > The docs are not very concise in what this really does, however both
> > Alex Deucher and Nicolai Hähnle suggested this only really affects instructions
> > using the CLAMP output modifier, and I've confirmed that with the newly
> > changed piglit isinf_and_isnan test.
> > So, with this bit set, if an instruction has the CLAMP modifier bit (which
> > clamps to [0,1]) set, then NaNs will be converted to zero, otherwise the result
> > will be NaN.
> > D3D10 would require this, glsl doesn't have modifiers (with mesa
> > clamp(x,0,1) would get converted to such a modifier) coupled with a
> > whatever-floats-your-boat specified NaN behavior, but the clamp behavior
> > should probably always be used (this also matches what a decomposition into
> > min(1.0, max(x, 0.0)) would do, if min/max also adhere to the ieee spec of
> > picking the non-nan result).
> > Some apps may in fact rely on this, as this prevents misrenderings in
> > This War of Mine since using ieee muls
> > (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped
> > rcp opcode, which would also fix this bug there.
> > radeonsi also seems to set this bit nowadays if I see that righ (albeit the
> > llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0"
> > instead of "Do not clamp NAN to 0" since it was changed, which also looks
> > a bit misleading).
> > 
> > v2: set it in all shader stages.
> > 
> > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103544
> > ---
> >  src/gallium/drivers/r600/evergreen_state.c | 6 ++++++
> >  src/gallium/drivers/r600/r600_state.c      | 9 +++++++++
> >  2 files changed, 15 insertions(+)
> > 
> > diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
> > index 96eb35a981..ef323bf4f6 100644
> > --- a/src/gallium/drivers/r600/evergreen_state.c
> > +++ b/src/gallium/drivers/r600/evergreen_state.c
> > @@ -3235,6 +3235,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
> >  	r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
> >  			 S_028844_NUM_GPRS(rshader->bc.ngpr) |
> >  			 S_028844_PRIME_CACHE_ON_DRAW(1) |
> > +			 S_028844_DX10_CLAMP(1) |
> >  			 S_028844_STACK_SIZE(rshader->bc.nstack));
> >  	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
> >  
> > @@ -3255,6 +3256,7 @@ void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader
> >  
> >  	r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
> >  			       S_028890_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_028890_DX10_CLAMP(1) |
> >  			       S_028890_STACK_SIZE(rshader->bc.nstack));
> >  	r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
> >  			       shader->bo->gpu_address >> 8);
> > @@ -3317,6 +3319,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
> >  
> >  	r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
> >  			       S_028878_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_028878_DX10_CLAMP(1) |
> >  			       S_028878_STACK_SIZE(rshader->bc.nstack));
> >  	r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
> >  			       shader->bo->gpu_address >> 8);
> > @@ -3357,6 +3360,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader
> >  			       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
> >  	r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
> >  			       S_028860_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_028860_DX10_CLAMP(1) |
> >  			       S_028860_STACK_SIZE(rshader->bc.nstack));
> >  	if (rshader->vs_position_window_space) {
> >  		r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
> > @@ -3391,6 +3395,7 @@ void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader
> >  	r600_init_command_buffer(cb, 32);
> >  	r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
> >  			       S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_0288BC_DX10_CLAMP(1) |
> >  			       S_0288BC_STACK_SIZE(rshader->bc.nstack));
> >  	r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
> >  			       shader->bo->gpu_address >> 8);
> > @@ -3404,6 +3409,7 @@ void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader
> >  	r600_init_command_buffer(cb, 32);
> >  	r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS,
> >  			       S_0288D4_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_0288D4_DX10_CLAMP(1) |
> >  			       S_0288D4_STACK_SIZE(rshader->bc.nstack));
> >  	r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS,
> >  			       shader->bo->gpu_address >> 8);
> > diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
> > index c21e8dabb1..db3d6db70b 100644
> > --- a/src/gallium/drivers/r600/r600_state.c
> > +++ b/src/gallium/drivers/r600/r600_state.c
> > @@ -2548,6 +2548,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> >  	r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
> >  	r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
> >  			 S_028850_NUM_GPRS(rshader->bc.ngpr) |
> > +	/*
> > +	 * docs are misleading about the dx10_clamp bit. This only affects
> > +	 * instructions using CLAMP dst modifier, in which case they will
> > +	 * return 0 with this set for a NaN (otherwise NaN).
> > +	 */
> > +			 S_028850_DX10_CLAMP(1) |
> >  			 S_028850_STACK_SIZE(rshader->bc.nstack) |
> >  			 S_028850_UNCACHED_FIRST_INST(ufi));
> >  	r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
> > @@ -2597,6 +2603,7 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> >  			       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
> >  	r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS,
> >  			       S_028868_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_028868_DX10_CLAMP(1) |
> >  			       S_028868_STACK_SIZE(rshader->bc.nstack));
> >  	if (rshader->vs_position_window_space) {
> >  		r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
> > @@ -2681,6 +2688,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> >  
> >  	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS,
> >  			       S_02887C_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_02887C_DX10_CLAMP(1) |
> >  			       S_02887C_STACK_SIZE(rshader->bc.nstack));
> >  	r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0);
> >  	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
> > @@ -2695,6 +2703,7 @@ void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
> >  
> >  	r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
> >  			       S_028890_NUM_GPRS(rshader->bc.ngpr) |
> > +			       S_028890_DX10_CLAMP(1) |
> >  			       S_028890_STACK_SIZE(rshader->bc.nstack));
> >  	r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0);
> >  	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
-- 
Br,

Andres


More information about the mesa-dev mailing list