[Mesa-dev] [Mesa-stable] [PATCH 2/5] r600: use DX10_CLAMP bit in shader setup

Roland Scheidegger sroland at vmware.com
Tue Nov 28 17:16:31 UTC 2017


Am 28.11.2017 um 15:11 schrieb Andres Gomez:
> On Tue, 2017-11-28 at 16:05 +0200, Andres Gomez wrote:
>> Nicolai, this looks like a good candidate to nominate for inclusion in
>   ^^^^^^^
> I meant Roland 😕

:-)

Yes I think that makes sense. Albeit
71e630753ebbee82e8f8709da5488296b2c070c8 (doing the same thing for
compute shaders) should have been part of that commit, but it's probably
not relevant for stable (because without shader images noone is going to
use cs anyway).
Apart from this commit (3835009796166968750ff46cf209f6d4208cda86) the
preceding one in the series (aab0bfc648bf1be50b81a25224970015f1dc78b8)
probably would make sense too for stable, the rest maybe less so.

Roland

> 
>> all the stable queues.
>>
>> What do you think?
>>
>> On Thu, 2017-11-09 at 20:00 +0100, sroland at vmware.com wrote:
>>> From: Roland Scheidegger <sroland at vmware.com>
>>>
>>> The docs are not very concise in what this really does, however both
>>> Alex Deucher and Nicolai Hähnle suggested this only really affects instructions
>>> using the CLAMP output modifier, and I've confirmed that with the newly
>>> changed piglit isinf_and_isnan test.
>>> So, with this bit set, if an instruction has the CLAMP modifier bit (which
>>> clamps to [0,1]) set, then NaNs will be converted to zero, otherwise the result
>>> will be NaN.
>>> D3D10 would require this, glsl doesn't have modifiers (with mesa
>>> clamp(x,0,1) would get converted to such a modifier) coupled with a
>>> whatever-floats-your-boat specified NaN behavior, but the clamp behavior
>>> should probably always be used (this also matches what a decomposition into
>>> min(1.0, max(x, 0.0)) would do, if min/max also adhere to the ieee spec of
>>> picking the non-nan result).
>>> Some apps may in fact rely on this, as this prevents misrenderings in
>>> This War of Mine since using ieee muls
>>> (ce7a045feeef8cad155f1c9aa07f166e146e3d00), without having to use clamped
>>> rcp opcode, which would also fix this bug there.
>>> radeonsi also seems to set this bit nowadays if I see that righ (albeit the
>>> llvm amdgpu code comment now says "Make clamp modifier on NaN input returns 0"
>>> instead of "Do not clamp NAN to 0" since it was changed, which also looks
>>> a bit misleading).
>>>
>>> v2: set it in all shader stages.
>>>
>>> Bugzilla: https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D103544&d=DwIDaQ&c=uilaK90D4TOVoH58JNXRgQ&r=_QIjpv-UJ77xEQY8fIYoQtr5qv8wKrPJc7v7_-CYAb0&m=sR7Iqk-YhermS8MMpwox2HA2s2fPOlaYP13vfbskXc0&s=_3czmhaN9GxlXnOXjTFOEqvzl8l7ludRas5PwjLwVlI&e=
>>> ---
>>>  src/gallium/drivers/r600/evergreen_state.c | 6 ++++++
>>>  src/gallium/drivers/r600/r600_state.c      | 9 +++++++++
>>>  2 files changed, 15 insertions(+)
>>>
>>> diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
>>> index 96eb35a981..ef323bf4f6 100644
>>> --- a/src/gallium/drivers/r600/evergreen_state.c
>>> +++ b/src/gallium/drivers/r600/evergreen_state.c
>>> @@ -3235,6 +3235,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
>>>  	r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
>>>  			 S_028844_NUM_GPRS(rshader->bc.ngpr) |
>>>  			 S_028844_PRIME_CACHE_ON_DRAW(1) |
>>> +			 S_028844_DX10_CLAMP(1) |
>>>  			 S_028844_STACK_SIZE(rshader->bc.nstack));
>>>  	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
>>>  
>>> @@ -3255,6 +3256,7 @@ void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader
>>>  
>>>  	r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
>>>  			       S_028890_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_028890_DX10_CLAMP(1) |
>>>  			       S_028890_STACK_SIZE(rshader->bc.nstack));
>>>  	r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
>>>  			       shader->bo->gpu_address >> 8);
>>> @@ -3317,6 +3319,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
>>>  
>>>  	r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
>>>  			       S_028878_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_028878_DX10_CLAMP(1) |
>>>  			       S_028878_STACK_SIZE(rshader->bc.nstack));
>>>  	r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
>>>  			       shader->bo->gpu_address >> 8);
>>> @@ -3357,6 +3360,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader
>>>  			       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
>>>  	r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
>>>  			       S_028860_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_028860_DX10_CLAMP(1) |
>>>  			       S_028860_STACK_SIZE(rshader->bc.nstack));
>>>  	if (rshader->vs_position_window_space) {
>>>  		r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
>>> @@ -3391,6 +3395,7 @@ void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader
>>>  	r600_init_command_buffer(cb, 32);
>>>  	r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
>>>  			       S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_0288BC_DX10_CLAMP(1) |
>>>  			       S_0288BC_STACK_SIZE(rshader->bc.nstack));
>>>  	r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
>>>  			       shader->bo->gpu_address >> 8);
>>> @@ -3404,6 +3409,7 @@ void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader
>>>  	r600_init_command_buffer(cb, 32);
>>>  	r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS,
>>>  			       S_0288D4_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_0288D4_DX10_CLAMP(1) |
>>>  			       S_0288D4_STACK_SIZE(rshader->bc.nstack));
>>>  	r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS,
>>>  			       shader->bo->gpu_address >> 8);
>>> diff --git a/src/gallium/drivers/r600/r600_state.c b/src/gallium/drivers/r600/r600_state.c
>>> index c21e8dabb1..db3d6db70b 100644
>>> --- a/src/gallium/drivers/r600/r600_state.c
>>> +++ b/src/gallium/drivers/r600/r600_state.c
>>> @@ -2548,6 +2548,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
>>>  	r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
>>>  	r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
>>>  			 S_028850_NUM_GPRS(rshader->bc.ngpr) |
>>> +	/*
>>> +	 * docs are misleading about the dx10_clamp bit. This only affects
>>> +	 * instructions using CLAMP dst modifier, in which case they will
>>> +	 * return 0 with this set for a NaN (otherwise NaN).
>>> +	 */
>>> +			 S_028850_DX10_CLAMP(1) |
>>>  			 S_028850_STACK_SIZE(rshader->bc.nstack) |
>>>  			 S_028850_UNCACHED_FIRST_INST(ufi));
>>>  	r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
>>> @@ -2597,6 +2603,7 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
>>>  			       S_0286C4_VS_EXPORT_COUNT(nparams - 1));
>>>  	r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS,
>>>  			       S_028868_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_028868_DX10_CLAMP(1) |
>>>  			       S_028868_STACK_SIZE(rshader->bc.nstack));
>>>  	if (rshader->vs_position_window_space) {
>>>  		r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
>>> @@ -2681,6 +2688,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
>>>  
>>>  	r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS,
>>>  			       S_02887C_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_02887C_DX10_CLAMP(1) |
>>>  			       S_02887C_STACK_SIZE(rshader->bc.nstack));
>>>  	r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0);
>>>  	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
>>> @@ -2695,6 +2703,7 @@ void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
>>>  
>>>  	r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
>>>  			       S_028890_NUM_GPRS(rshader->bc.ngpr) |
>>> +			       S_028890_DX10_CLAMP(1) |
>>>  			       S_028890_STACK_SIZE(rshader->bc.nstack));
>>>  	r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0);
>>>  	/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */



More information about the mesa-dev mailing list