[Mesa-dev] [PATCH] gallium: fixup definitions of the rsq and sqrt

Zack Rusin zackr at vmware.com
Thu Jul 11 12:44:38 PDT 2013


GLSL spec says that rsq is undefined for src<=0, but the D3D10
spec says it needs to be a NaN, so lets stop taking an absolute
value of the source which completely breaks that behavior. For
the gl program we can simply insert an extra abs instrunction
which produces the desired behavior there.

Signed-off-by: Zack Rusin <zackr at vmware.com>
---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |    2 --
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |   16 ++++++++--------
 src/gallium/docs/source/tgsi.rst                   |    6 +++---
 src/mesa/state_tracker/st_mesa_to_tgsi.c           |   10 ++++++++--
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 1feaa19..8c26918 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -633,8 +633,6 @@ rsq_emit(
    struct lp_build_tgsi_context * bld_base,
    struct lp_build_emit_data * emit_data)
 {
-   emit_data->args[0] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_ABS,
-                                               emit_data->args[0]);
    if (bld_base->rsq_action.emit) {
       bld_base->rsq_action.emit(&bld_base->rsq_action, bld_base, emit_data);
    } else {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 4482c6b..9133bcb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -339,20 +339,20 @@ micro_rsq(union tgsi_exec_channel *dst,
    assert(src->f[2] != 0.0f);
    assert(src->f[3] != 0.0f);
 #endif
-   dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0]));
-   dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1]));
-   dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2]));
-   dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3]));
+   dst->f[0] = 1.0f / sqrtf(src->f[0]);
+   dst->f[1] = 1.0f / sqrtf(src->f[1]);
+   dst->f[2] = 1.0f / sqrtf(src->f[2]);
+   dst->f[3] = 1.0f / sqrtf(src->f[3]);
 }
 
 static void
 micro_sqrt(union tgsi_exec_channel *dst,
            const union tgsi_exec_channel *src)
 {
-   dst->f[0] = sqrtf(fabsf(src->f[0]));
-   dst->f[1] = sqrtf(fabsf(src->f[1]));
-   dst->f[2] = sqrtf(fabsf(src->f[2]));
-   dst->f[3] = sqrtf(fabsf(src->f[3]));
+   dst->f[0] = sqrtf(src->f[0]);
+   dst->f[1] = sqrtf(src->f[1]);
+   dst->f[2] = sqrtf(src->f[2]);
+   dst->f[3] = sqrtf(src->f[3]);
 }
 
 static void
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 3f48b51..ab395a4 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -94,16 +94,16 @@ This instruction replicates its result.
 
 .. opcode:: RSQ - Reciprocal Square Root
 
-This instruction replicates its result.
+This instruction replicates its result. The results are undefined for src <= 0.
 
 .. math::
 
-  dst = \frac{1}{\sqrt{|src.x|}}
+  dst = \frac{1}{\sqrt{src.x}}
 
 
 .. opcode:: SQRT - Square Root
 
-This instruction replicates its result.
+This instruction replicates its result. The results are undefined for src < 0.
 
 .. math::
 
diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c
index dd9f4fc..168585a 100644
--- a/src/mesa/state_tracker/st_mesa_to_tgsi.c
+++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c
@@ -614,8 +614,6 @@ translate_opcode( unsigned op )
       return TGSI_OPCODE_RCP;
    case OPCODE_RET:
       return TGSI_OPCODE_RET;
-   case OPCODE_RSQ:
-      return TGSI_OPCODE_RSQ;
    case OPCODE_SCS:
       return TGSI_OPCODE_SCS;
    case OPCODE_SEQ:
@@ -755,6 +753,14 @@ compile_instruction(
       emit_ddy( t, dst[0], &inst->SrcReg[0] );
       break;
 
+   case OPCODE_RSQ: {
+      struct ureg_dst temp = ureg_DECL_temporary( ureg );
+      ureg_ABS( ureg, temp, src[0] );
+      ureg_RSQ( ureg, dst[0], ureg_src(temp) );
+      ureg_release_temporary( ureg, temp );
+   }
+      break;
+
    default:
       ureg_insn( ureg, 
                  translate_opcode( inst->Opcode ), 
-- 
1.7.10.4


More information about the mesa-dev mailing list