[Mesa-dev] [PATCH] gallium: fixup definitions of the rsq and sqrt

Roland Scheidegger sroland at vmware.com
Thu Jul 11 16:03:46 PDT 2013


Am 11.07.2013 21:44, schrieb Zack Rusin:
> GLSL spec says that rsq is undefined for src<=0, but the D3D10
> spec says it needs to be a NaN, so lets stop taking an absolute
> value of the source which completely breaks that behavior. For
> the gl program we can simply insert an extra abs instrunction
> which produces the desired behavior there.
> 
> Signed-off-by: Zack Rusin <zackr at vmware.com>
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |    2 --
>  src/gallium/auxiliary/tgsi/tgsi_exec.c             |   16 ++++++++--------
>  src/gallium/docs/source/tgsi.rst                   |    6 +++---
>  src/mesa/state_tracker/st_mesa_to_tgsi.c           |   10 ++++++++--
>  4 files changed, 19 insertions(+), 15 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 1feaa19..8c26918 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -633,8 +633,6 @@ rsq_emit(
>     struct lp_build_tgsi_context * bld_base,
>     struct lp_build_emit_data * emit_data)
>  {
> -   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> -                                               emit_data->args[0]);
>     if (bld_base->rsq_action.emit) {
>        bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
>     } else {
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 4482c6b..9133bcb 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst,
>     assert(src->f[2] != 0.0f);
>     assert(src->f[3] != 0.0f);
>  #endif
> -   dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
> -   dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
> -   dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
> -   dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
> +   dst->f[0] = 1.0f / sqrtf(src->f[0]);
> +   dst->f[1] = 1.0f / sqrtf(src->f[1]);
> +   dst->f[2] = 1.0f / sqrtf(src->f[2]);
> +   dst->f[3] = 1.0f / sqrtf(src->f[3]);
>  }
>  
>  static void
>  micro_sqrt(union tgsi_exec_channel *dst,
>             const union tgsi_exec_channel *src)
>  {
> -   dst->f[0] = sqrtf(fabsf(src->f[0]));
> -   dst->f[1] = sqrtf(fabsf(src->f[1]));
> -   dst->f[2] = sqrtf(fabsf(src->f[2]));
> -   dst->f[3] = sqrtf(fabsf(src->f[3]));
> +   dst->f[0] = sqrtf(src->f[0]);
> +   dst->f[1] = sqrtf(src->f[1]);
> +   dst->f[2] = sqrtf(src->f[2]);
> +   dst->f[3] = sqrtf(src->f[3]);
>  }
>  
>  static void
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 3f48b51..ab395a4 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -94,16 +94,16 @@ This instruction replicates its result.
>  
>  .. opcode:: RSQ - Reciprocal Square Root
>  
> -This instruction replicates its result.
> +This instruction replicates its result. The results are undefined for src <= 0.
>  
>  .. math::
>  
> -  dst = \frac{1}{\sqrt{|src.x|}}
> +  dst = \frac{1}{\sqrt{src.x}}
>  
>  
>  .. opcode:: SQRT - Square Root
>  
> -This instruction replicates its result.
> +This instruction replicates its result. The results are undefined for src < 0.
>  
>  .. math::
>  
> diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> index dd9f4fc..168585a 100644
> --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
> +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> @@ -614,8 +614,6 @@ translate_opcode( unsigned op )
>        return TGSI_OPCODE_RCP;
>     case OPCODE_RET:
>        return TGSI_OPCODE_RET;
> -   case OPCODE_RSQ:
> -      return TGSI_OPCODE_RSQ;
>     case OPCODE_SCS:
>        return TGSI_OPCODE_SCS;
>     case OPCODE_SEQ:
> @@ -755,6 +753,14 @@ compile_instruction(
>        emit_ddy( t, dst[0], &inst->SrcReg[0] );
>        break;
>  
> +   case OPCODE_RSQ: {
> +      struct ureg_dst temp = ureg_DECL_temporary( ureg );
> +      ureg_ABS( ureg, temp, src[0] );
> +      ureg_RSQ( ureg, dst[0], ureg_src(temp) );
> +      ureg_release_temporary( ureg, temp );
> +   }
> +      break;
> +
>     default:
>        ureg_insn( ureg, 
>                   translate_opcode( inst->Opcode ), 
> 

There's also a comment at recip_sqrt_emit_cpu() saying how it isn't the
same as TGSI_OPCODE_RSQ which is now totally untrue, either way though

Reviewed-by: Roland Scheidegger <sroland at vmware.com>


More information about the mesa-dev mailing list