[Mesa-dev] [PATCH 12/12] mesa: remove INV_SQRTF() macro

Tue Feb 24 09:37:32 PST 2015

The comment in INV_SQRTF sort of makes it sound like optimizing it might
be worthwile.
x86 sse has a very fast rsqrt function, though you'd need to be able to
guarantee you can live with crappy precision (10 bits or so and IIRC in
particular the biggest issue with it was that rsqrt(1.0) != 1.0... - I
guess wouldn't have been a problem for Quake...).
div, sqrt is quite a problematic instruction sequence in fact, on modern
cpus these will use the unpipelined divison unit with really high
latency for both instructions so a single rsqrt ought to be like 20
times faster.
However, since apparently noone tried to implement an optimized
INV_SQRTF version, getting rid of it seems the right thing to do.

Reviewed-by: Roland Scheidegger <sroland at vmware.com>

Am 24.02.2015 um 17:57 schrieb Brian Paul:
> ---
>  src/mesa/main/imports.h         | 9 ---------
>  src/mesa/main/light.c           | 4 ++--
>  src/mesa/main/macros.h          | 2 +-
>  src/mesa/math/m_debug_norm.c    | 5 +++--
>  src/mesa/math/m_norm_tmp.h      | 6 +++---
>  src/mesa/math/m_xform.c         | 1 +
>  src/mesa/program/prog_execute.c | 2 +-
>  src/mesa/tnl/t_rasterpos.c      | 2 +-
>  src/mesa/tnl/t_vb_points.c      | 2 +-
>  src/mesa/tnl/t_vb_texgen.c      | 4 ++--
>  10 files changed, 15 insertions(+), 22 deletions(-)
> 
> diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
> index da373b0..df6a3fe 100644
> --- a/src/mesa/main/imports.h
> +++ b/src/mesa/main/imports.h
> @@ -119,15 +119,6 @@ static inline int isblank(int ch) { return ch == ' ' || ch == '\t'; }
>  #endif
>  
>  
> -/** single-precision inverse square root */
> -static inline float
> -INV_SQRTF(float x)
> -{
> -   /* XXX we could try Quake's fast inverse square root function here */
> -   return 1.0F / sqrtf(x);
> -}
> -
> -
>  /***
>   *** LOG2: Log base 2 of float
>   ***/
> diff --git a/src/mesa/main/light.c b/src/mesa/main/light.c
> index 9db0bff..c4d3a53 100644
> --- a/src/mesa/main/light.c
> +++ b/src/mesa/main/light.c
> @@ -1026,9 +1026,9 @@ update_modelview_scale( struct gl_context *ctx )
>        GLfloat f = m[2] * m[2] + m[6] * m[6] + m[10] * m[10];
>        if (f < 1e-12) f = 1.0;
>        if (ctx->_NeedEyeCoords)
> -	 ctx->_ModelViewInvScale = (GLfloat) INV_SQRTF(f);
> +	 ctx->_ModelViewInvScale = 1.0f / sqrtf(f);
>        else
> -	 ctx->_ModelViewInvScale = (GLfloat) sqrtf(f);
> +	 ctx->_ModelViewInvScale = sqrtf(f);
>     }
>  }
>  
> diff --git a/src/mesa/main/macros.h b/src/mesa/main/macros.h
> index 11e3b2a..470d396 100644
> --- a/src/mesa/main/macros.h
> +++ b/src/mesa/main/macros.h
> @@ -775,7 +775,7 @@ NORMALIZE_3FV(GLfloat v[3])
>  {
>     GLfloat len = (GLfloat) LEN_SQUARED_3FV(v);
>     if (len) {
> -      len = INV_SQRTF(len);
> +      len = 1.0f / sqrtf(len);
>        v[0] *= len;
>        v[1] *= len;
>        v[2] *= len;
> diff --git a/src/mesa/math/m_debug_norm.c b/src/mesa/math/m_debug_norm.c
> index 00e72be..197b43c 100644
> --- a/src/mesa/math/m_debug_norm.c
> +++ b/src/mesa/math/m_debug_norm.c
> @@ -26,6 +26,7 @@
>   *    Gareth Hughes
>   */
>  
> +#include "c99_math.h"
>  #include "main/glheader.h"
>  #include "main/context.h"
>  #include "main/macros.h"
> @@ -165,7 +166,7 @@ static void ref_norm_transform_normalize( const GLmatrix *mat,
>  	    /* Hmmm, don't know how we could test the precalculated
>  	     * length case...
>  	     */
> -            scale = INV_SQRTF( len );
> +            scale = 1.0f / sqrtf(len);
>  	    SCALE_SCALAR_3V( out[i], scale, t );
>           } else {
>              out[i][0] = out[i][1] = out[i][2] = 0;
> @@ -241,7 +242,7 @@ static int test_norm_function( normal_func func, int mtype, long *cycles )
>        ASSIGN_3V( d2[i], 0.0, 0.0, 0.0 );
>        for ( j = 0 ; j < 3 ; j++ )
>           s[i][j] = rnd();
> -      length[i] = INV_SQRTF( LEN_SQUARED_3FV( s[i] ) );
> +      length[i] = 1.0f / sqrtf( LEN_SQUARED_3FV( s[i] ) );
>     }
>  
>     source->data = (GLfloat(*)[4]) s;
> diff --git a/src/mesa/math/m_norm_tmp.h b/src/mesa/math/m_norm_tmp.h
> index 339c03f..c8fab0e 100644
> --- a/src/mesa/math/m_norm_tmp.h
> +++ b/src/mesa/math/m_norm_tmp.h
> @@ -68,7 +68,7 @@ TAG(transform_normalize_normals)( const GLmatrix *mat,
>  	 {
>  	    GLdouble len = tx*tx + ty*ty + tz*tz;
>  	    if (len > 1e-20) {
> -	       GLfloat scale = INV_SQRTF(len);
> +	       GLfloat scale = 1.0f / sqrtf(len);
>  	       out[i][0] = tx * scale;
>  	       out[i][1] = ty * scale;
>  	       out[i][2] = tz * scale;
> @@ -135,7 +135,7 @@ TAG(transform_normalize_normals_no_rot)( const GLmatrix *mat,
>  	 {
>  	    GLdouble len = tx*tx + ty*ty + tz*tz;
>  	    if (len > 1e-20) {
> -	       GLfloat scale = INV_SQRTF(len);
> +	       GLfloat scale = 1.0f / sqrtf(len);
>  	       out[i][0] = tx * scale;
>  	       out[i][1] = ty * scale;
>  	       out[i][2] = tz * scale;
> @@ -322,7 +322,7 @@ TAG(normalize_normals)( const GLmatrix *mat,
>  	 const GLfloat x = from[0], y = from[1], z = from[2];
>  	 GLdouble len = x * x + y * y + z * z;
>  	 if (len > 1e-50) {
> -	    len = INV_SQRTF(len);
> +	    len = 1.0f / sqrtf(len);
>  	    out[i][0] = (GLfloat)(x * len);
>  	    out[i][1] = (GLfloat)(y * len);
>  	    out[i][2] = (GLfloat)(z * len);
> diff --git a/src/mesa/math/m_xform.c b/src/mesa/math/m_xform.c
> index 14d1c64..718ad49 100644
> --- a/src/mesa/math/m_xform.c
> +++ b/src/mesa/math/m_xform.c
> @@ -33,6 +33,7 @@
>   * 3. Transformation of a point p by a matrix M is: p' = M * p
>   */
>  
> +#include "c99_math.h"
>  #include "main/glheader.h"
>  #include "main/macros.h"
>  
> diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
> index de3a53b..ac81332 100644
> --- a/src/mesa/program/prog_execute.c
> +++ b/src/mesa/program/prog_execute.c
> @@ -1085,7 +1085,7 @@ _mesa_execute_program(struct gl_context * ctx,
>              GLfloat a[4], result[4];
>              fetch_vector1(&inst->SrcReg[0], machine, a);
>              a[0] = fabsf(a[0]);
> -            result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
> +            result[0] = result[1] = result[2] = result[3] = 1.0f / sqrtf(a[0]);
>              store_vector4(inst, machine, result);
>              if (DEBUG_PROG) {
>                 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
> diff --git a/src/mesa/tnl/t_rasterpos.c b/src/mesa/tnl/t_rasterpos.c
> index 5945f64..9ecf947 100644
> --- a/src/mesa/tnl/t_rasterpos.c
> +++ b/src/mesa/tnl/t_rasterpos.c
> @@ -272,7 +272,7 @@ compute_texgen(struct gl_context *ctx, const GLfloat vObj[4], const GLfloat vEye
>     rz = u[2] - normal[2] * two_nu;
>     m = rx * rx + ry * ry + (rz + 1.0F) * (rz + 1.0F);
>     if (m > 0.0F)
> -      mInv = 0.5F * INV_SQRTF(m);
> +      mInv = 0.5F * (1.0f / sqrtf(m));
>     else
>        mInv = 0.0F;
>  
> diff --git a/src/mesa/tnl/t_vb_points.c b/src/mesa/tnl/t_vb_points.c
> index 273db76..0f8578d 100644
> --- a/src/mesa/tnl/t_vb_points.c
> +++ b/src/mesa/tnl/t_vb_points.c
> @@ -65,7 +65,7 @@ run_point_stage(struct gl_context *ctx, struct tnl_pipeline_stage *stage)
>        for (i = 0; i < VB->Count; i++) {
>           const GLfloat dist = fabsf(*eyeCoord);
>           const GLfloat q = p0 + dist * (p1 + dist * p2);
> -         const GLfloat atten = (q != 0.0F) ? INV_SQRTF(q) : 1.0F;
> +         const GLfloat atten = (q != 0.0F) ? (1.0f / sqrtf(q)) : 1.0F;
>           size[i][0] = pointSize * atten; /* clamping done in rasterization */
>           eyeCoord += eyeCoordStride;
>        }
> diff --git a/src/mesa/tnl/t_vb_texgen.c b/src/mesa/tnl/t_vb_texgen.c
> index 8f527e3..9a61ef2 100644
> --- a/src/mesa/tnl/t_vb_texgen.c
> +++ b/src/mesa/tnl/t_vb_texgen.c
> @@ -116,7 +116,7 @@ static void build_m3( GLfloat f[][3], GLfloat m[],
>        fz = f[i][2] = u[2] - norm[2] * two_nu;
>        m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
>        if (m[i] != 0.0F) {
> -	 m[i] = 0.5F * INV_SQRTF(m[i]);
> +	 m[i] = 0.5F * (1.0f / sqrtf(m[i]));
>        }
>     }
>  }
> @@ -145,7 +145,7 @@ static void build_m2( GLfloat f[][3], GLfloat m[],
>        fz = f[i][2] = u[2] - norm[2] * two_nu;
>        m[i] = fx * fx + fy * fy + (fz + 1.0F) * (fz + 1.0F);
>        if (m[i] != 0.0F) {
> -	 m[i] = 0.5F * INV_SQRTF(m[i]);
> +	 m[i] = 0.5F * (1.0f / sqrtf(m[i]));
>        }
>     }
>  }
>