[Mesa-dev] [PATCH 3/4] Remove _mesa_inv_sqrtf in favor of 1/SQRTF

Fri Jul 20 11:24:02 PDT 2012

Except for a couple of explicit uses, _mesa_inv_sqrtf was disabled since
its addition in 2003 (see f9b1e524).
---
 src/mesa/main/imports.c    |  106 --------------------------------------------
 src/mesa/main/imports.h    |    9 +---
 src/mesa/tnl/t_rasterpos.c |    2 +-
 src/mesa/tnl/t_vb_texgen.c |    4 +-
 4 files changed, 4 insertions(+), 117 deletions(-)

diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index fc30a6e..e7e877b 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -244,112 +244,6 @@ _mesa_memset16( unsigned short *dst, unsigned short val, size_t n )
 /*@{*/
 
 
-/**
- inv_sqrt - A single precision 1/sqrt routine for IEEE format floats.
- written by Josh Vanderhoof, based on newsgroup posts by James Van Buskirk
- and Vesa Karvonen.
-*/
-float
-_mesa_inv_sqrtf(float n)
-{
-#if defined(USE_IEEE) && !defined(DEBUG)
-        float r0, x0, y0;
-        float r1, x1, y1;
-        float r2, x2, y2;
-#if 0 /* not used, see below -BP */
-        float r3, x3, y3;
-#endif
-        fi_type u;
-        unsigned int magic;
-
-        /*
-         Exponent part of the magic number -
-
-         We want to:
-         1. subtract the bias from the exponent,
-         2. negate it
-         3. divide by two (rounding towards -inf)
-         4. add the bias back
-
-         Which is the same as subtracting the exponent from 381 and dividing
-         by 2.
-
-         floor(-(x - 127) / 2) + 127 = floor((381 - x) / 2)
-        */
-
-        magic = 381 << 23;
-
-        /*
-         Significand part of magic number -
-
-         With the current magic number, "(magic - u.i) >> 1" will give you:
-
-         for 1 <= u.f <= 2: 1.25 - u.f / 4
-         for 2 <= u.f <= 4: 1.00 - u.f / 8
-
-         This isn't a bad approximation of 1/sqrt.  The maximum difference from
-         1/sqrt will be around .06.  After three Newton-Raphson iterations, the
-         maximum difference is less than 4.5e-8.  (Which is actually close
-         enough to make the following bias academic...)
-
-         To get a better approximation you can add a bias to the magic
-         number.  For example, if you subtract 1/2 of the maximum difference in
-         the first approximation (.03), you will get the following function:
-
-         for 1 <= u.f <= 2:    1.22 - u.f / 4
-         for 2 <= u.f <= 3.76: 0.97 - u.f / 8
-         for 3.76 <= u.f <= 4: 0.72 - u.f / 16
-         (The 3.76 to 4 range is where the result is < .5.)
-
-         This is the closest possible initial approximation, but with a maximum
-         error of 8e-11 after three NR iterations, it is still not perfect.  If
-         you subtract 0.0332281 instead of .03, the maximum error will be
-         2.5e-11 after three NR iterations, which should be about as close as
-         is possible.
-
-         for 1 <= u.f <= 2:    1.2167719 - u.f / 4
-         for 2 <= u.f <= 3.73: 0.9667719 - u.f / 8
-         for 3.73 <= u.f <= 4: 0.7167719 - u.f / 16
-
-        */
-
-        magic -= (int)(0.0332281 * (1 << 25));
-
-        u.f = n;
-        u.i = (magic - u.i) >> 1;
-
-        /*
-         Instead of Newton-Raphson, we use Goldschmidt's algorithm, which
-         allows more parallelism.  From what I understand, the parallelism
-         comes at the cost of less precision, because it lets error
-         accumulate across iterations.
-        */
-        x0 = 1.0f;
-        y0 = 0.5f * n;
-        r0 = u.f;
-
-        x1 = x0 * r0;
-        y1 = y0 * r0 * r0;
-        r1 = 1.5f - y1;
-
-        x2 = x1 * r1;
-        y2 = y1 * r1 * r1;
-        r2 = 1.5f - y2;
-
-#if 1
-        return x2 * r2;  /* we can stop here, and be conformant -BP */
-#else
-        x3 = x2 * r2;
-        y3 = y2 * r2 * r2;
-        r3 = 1.5f - y3;
-
-        return x3 * r3;
-#endif
-#else
-        return (float) (1.0 / sqrt(n));
-#endif
-}
-
 #ifndef __GNUC__
 /**
  * Find the first bit set in a word.
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index e825f21..2544400 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -105,11 +105,7 @@ typedef union { GLfloat f; GLint i; } fi_type;
 /***
  *** INV_SQRTF: single-precision inverse square root
  ***/
-#if 0
-#define INV_SQRTF(X) _mesa_inv_sqrt(X)
-#else
-#define INV_SQRTF(X) (1.0F / SQRTF(X))  /* this is faster on a P4 */
-#endif
+#define INV_SQRTF(X) (1.0F / SQRTF(X))
 
 
 /**
@@ -565,9 +561,6 @@ _mesa_realloc( void *oldBuffer, size_t oldSize, size_t newSize );
 extern void
 _mesa_memset16( unsigned short *dst, unsigned short val, size_t n );
 
-extern float
-_mesa_inv_sqrtf(float x);
-
 
 #ifndef FFS_DEFINED
 #define FFS_DEFINED 1
diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
index 50b5fcb..a28ad0d 100644
--- a/src/mesa/tnl/t_rasterpos.c
+++ b/src/mesa/tnl/t_rasterpos.c
@@ -271,7 +271,7 @@ compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye
    rz = u[2] - normal[2] * two_nu;
    m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F);
    if (m > 0.0F)
-      mInv = 0.5F * _mesa_inv_sqrtf(m);
+      mInv = 0.5F * INV_SQRTF(m);
    else
       mInv = 0.0F;
 
diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c
index 61430c3..d4c7885 100644
--- a/src/mesa/tnl/t_vb_texgen.c
+++ b/src/mesa/tnl/t_vb_texgen.c
@@ -117,7 +117,7 @@ static void build_m3( GLfloat f[][3], GLfloat m[],
       fz = f[i][2] = u[2] - norm[2] * two_nu;
       m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
       if (m[i] != 0.0F) {
-	 m[i] = 0.5F * _mesa_inv_sqrtf(m[i]);
+	 m[i] = 0.5F * INV_SQRTF(m[i]);
       }
    }
 }
@@ -146,7 +146,7 @@ static void build_m2( GLfloat f[][3], GLfloat m[],
       fz = f[i][2] = u[2] - norm[2] * two_nu;
       m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
       if (m[i] != 0.0F) {
-	 m[i] = 0.5F * _mesa_inv_sqrtf(m[i]);
+	 m[i] = 0.5F * INV_SQRTF(m[i]);
       }
    }
 }
-- 
1.7.8.6