[Mesa-dev] [PATCH] gallium: fixup definitions of the rsq and sqrt
Marek Olšák
maraeo at gmail.com
Thu Jul 11 17:55:12 PDT 2013
The ARB_vertex_program specification defines RSQ as:
tmp = fabs(ScalarLoad(op0));
result.x = ApproxRSQRT(tmp);
result.y = ApproxRSQRT(tmp);
result.z = ApproxRSQRT(tmp);
result.w = ApproxRSQRT(tmp);
The mesa_to_tgsi code looks good, but it's very suboptimal. Source
operands have an absolute modifier, so let's use it. The optimal code
is:
case OPCODE_RSQ:
ureg_RSQ(ureg, dst[0], ureg_abs(src[0]));
break;
I guess the ABS opcode should be removed to prevent confusion.
Marek
On Thu, Jul 11, 2013 at 9:44 PM, Zack Rusin <zackr at vmware.com> wrote:
> GLSL spec says that rsq is undefined for src<=0, but the D3D10
> spec says it needs to be a NaN, so lets stop taking an absolute
> value of the source which completely breaks that behavior. For
> the gl program we can simply insert an extra abs instrunction
> which produces the desired behavior there.
>
> Signed-off-by: Zack Rusin <zackr at vmware.com>
> ---
> src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 2 --
> src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 ++++++++--------
> src/gallium/docs/source/tgsi.rst | 6 +++---
> src/mesa/state_tracker/st_mesa_to_tgsi.c | 10 ++++++++--
> 4 files changed, 19 insertions(+), 15 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> index 1feaa19..8c26918 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
> @@ -633,8 +633,6 @@ rsq_emit(
> struct lp_build_tgsi_context * bld_base,
> struct lp_build_emit_data * emit_data)
> {
> - emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
> - emit_data->args[0]);
> if (bld_base->rsq_action.emit) {
> bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
> } else {
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> index 4482c6b..9133bcb 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
> @@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst,
> assert(src->f[2] != 0.0f);
> assert(src->f[3] != 0.0f);
> #endif
> - dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
> - dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
> - dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
> - dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
> + dst->f[0] = 1.0f / sqrtf(src->f[0]);
> + dst->f[1] = 1.0f / sqrtf(src->f[1]);
> + dst->f[2] = 1.0f / sqrtf(src->f[2]);
> + dst->f[3] = 1.0f / sqrtf(src->f[3]);
> }
>
> static void
> micro_sqrt(union tgsi_exec_channel *dst,
> const union tgsi_exec_channel *src)
> {
> - dst->f[0] = sqrtf(fabsf(src->f[0]));
> - dst->f[1] = sqrtf(fabsf(src->f[1]));
> - dst->f[2] = sqrtf(fabsf(src->f[2]));
> - dst->f[3] = sqrtf(fabsf(src->f[3]));
> + dst->f[0] = sqrtf(src->f[0]);
> + dst->f[1] = sqrtf(src->f[1]);
> + dst->f[2] = sqrtf(src->f[2]);
> + dst->f[3] = sqrtf(src->f[3]);
> }
>
> static void
> diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
> index 3f48b51..ab395a4 100644
> --- a/src/gallium/docs/source/tgsi.rst
> +++ b/src/gallium/docs/source/tgsi.rst
> @@ -94,16 +94,16 @@ This instruction replicates its result.
>
> .. opcode:: RSQ - Reciprocal Square Root
>
> -This instruction replicates its result.
> +This instruction replicates its result. The results are undefined for src <= 0.
>
> .. math::
>
> - dst = \frac{1}{\sqrt{|src.x|}}
> + dst = \frac{1}{\sqrt{src.x}}
>
>
> .. opcode:: SQRT - Square Root
>
> -This instruction replicates its result.
> +This instruction replicates its result. The results are undefined for src < 0.
>
> .. math::
>
> diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> index dd9f4fc..168585a 100644
> --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
> +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
> @@ -614,8 +614,6 @@ translate_opcode( unsigned op )
> return TGSI_OPCODE_RCP;
> case OPCODE_RET:
> return TGSI_OPCODE_RET;
> - case OPCODE_RSQ:
> - return TGSI_OPCODE_RSQ;
> case OPCODE_SCS:
> return TGSI_OPCODE_SCS;
> case OPCODE_SEQ:
> @@ -755,6 +753,14 @@ compile_instruction(
> emit_ddy( t, dst[0], &inst->SrcReg[0] );
> break;
>
> + case OPCODE_RSQ: {
> + struct ureg_dst temp = ureg_DECL_temporary( ureg );
> + ureg_ABS( ureg, temp, src[0] );
> + ureg_RSQ( ureg, dst[0], ureg_src(temp) );
> + ureg_release_temporary( ureg, temp );
> + }
> + break;
> +
> default:
> ureg_insn( ureg,
> translate_opcode( inst->Opcode ),
> --
> 1.7.10.4
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list