[Mesa-dev] [PATCH] gallium: fixup definitions of the rsq and sqrt

Marek Olšák maraeo at gmail.com
Thu Jul 11 17:55:12 PDT 2013


The ARB_vertex_program specification defines RSQ as:

      tmp = fabs(ScalarLoad(op0));
      result.x = ApproxRSQRT(tmp);
      result.y = ApproxRSQRT(tmp);
      result.z = ApproxRSQRT(tmp);
      result.w = ApproxRSQRT(tmp);

The mesa_to_tgsi code looks good, but it's very suboptimal. Source
operands have an absolute modifier, so let's use it. The optimal code
is:

   case OPCODE_RSQ:
      ureg_RSQ(ureg, dst[0], ureg_abs(src[0]));
      break;

I guess the ABS opcode should be removed to prevent confusion.

Marek


On Thu, Jul 11, 2013 at 9:44 PM, Zack Rusin <zackr at vmware.com> wrote:
> GLSL spec says that rsq is undefined for src<=0, but the D3D10
> spec says it needs to be a NaN, so lets stop taking an absolute
> value of the source which completely breaks that behavior. For
> the gl program we can simply insert an extra abs instrunction
> which produces the desired behavior there.
>
> Signed-off-by: Zack Rusin <zackr at vmware.com>
> ---
>  src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |    2 --
>  src/gallium/auxiliary/tgsi/tgsi_exec.c             |   16 ++++++++--------
>  src/gallium/docs/source/tgsi.rst                   |    6 +++---
>  src/mesa/state_tracker/st_mesa_to_tgsi.c           |   10 ++++++++--
>  4 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 1feaa19..8c26918 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -633,8 +633,6 @@ rsq_emit(
>     struct lp_build_tgsi_context * bld_base,
>     struct lp_build_emit_data * emit_data)
>  {
> -   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> -                                               emit_data->args[0]);
>     if (bld_base->rsq_action.emit) {
>        bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
>     } else {
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 4482c6b..9133bcb 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst,
>     assert(src->f[2] != 0.0f);
>     assert(src->f[3] != 0.0f);
>  #endif
> -   dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
> -   dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
> -   dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
> -   dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
> +   dst->f[0] = 1.0f / sqrtf(src->f[0]);
> +   dst->f[1] = 1.0f / sqrtf(src->f[1]);
> +   dst->f[2] = 1.0f / sqrtf(src->f[2]);
> +   dst->f[3] = 1.0f / sqrtf(src->f[3]);
>  }
>
>  static void
>  micro_sqrt(union tgsi_exec_channel *dst,
>             const union tgsi_exec_channel *src)
>  {
> -   dst->f[0] = sqrtf(fabsf(src->f[0]));
> -   dst->f[1] = sqrtf(fabsf(src->f[1]));
> -   dst->f[2] = sqrtf(fabsf(src->f[2]));
> -   dst->f[3] = sqrtf(fabsf(src->f[3]));
> +   dst->f[0] = sqrtf(src->f[0]);
> +   dst->f[1] = sqrtf(src->f[1]);
> +   dst->f[2] = sqrtf(src->f[2]);
> +   dst->f[3] = sqrtf(src->f[3]);
>  }
>
>  static void
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 3f48b51..ab395a4 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -94,16 +94,16 @@ This instruction replicates its result.
>
>  .. opcode:: RSQ - Reciprocal Square Root
>
> -This instruction replicates its result.
> +This instruction replicates its result. The results are undefined for src <= 0.
>
>  .. math::
>
> -  dst = \frac{1}{\sqrt{|src.x|}}
> +  dst = \frac{1}{\sqrt{src.x}}
>
>
>  .. opcode:: SQRT - Square Root
>
> -This instruction replicates its result.
> +This instruction replicates its result. The results are undefined for src < 0.
>
>  .. math::
>
> diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> index dd9f4fc..168585a 100644
> --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
> +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> @@ -614,8 +614,6 @@ translate_opcode( unsigned op )
>        return TGSI_OPCODE_RCP;
>     case OPCODE_RET:
>        return TGSI_OPCODE_RET;
> -   case OPCODE_RSQ:
> -      return TGSI_OPCODE_RSQ;
>     case OPCODE_SCS:
>        return TGSI_OPCODE_SCS;
>     case OPCODE_SEQ:
> @@ -755,6 +753,14 @@ compile_instruction(
>        emit_ddy( t, dst[0], &inst->SrcReg[0] );
>        break;
>
> +   case OPCODE_RSQ: {
> +      struct ureg_dst temp = ureg_DECL_temporary( ureg );
> +      ureg_ABS( ureg, temp, src[0] );
> +      ureg_RSQ( ureg, dst[0], ureg_src(temp) );
> +      ureg_release_temporary( ureg, temp );
> +   }
> +      break;
> +
>     default:
>        ureg_insn( ureg,
>                   translate_opcode( inst->Opcode ),
> --
> 1.7.10.4
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list