[Mesa-dev] [PATCH 04/13] i965: Use float calculations when double is unnecessary.

Iago Toral itoral at igalia.com
Tue Jul 14 02:26:00 PDT 2015


Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

On Mon, 2015-07-13 at 16:22 -0700, Matt Turner wrote:
> Literals without an f/F suffix are of type double, and implicit
> conversion rules specify that the float in (float op double) be
> converted to a double before the operation is performed. I believe float
> execution was intended (in nearly all cases) or is sufficient (in the
> case of gen7_urb.c).
> 
> Removes a lot of float <-> double conversion instructions and replaces
> many double instructions with float instructions which are cheaper.
> 
>    text     data      bss      dec      hex  filename
> 4928659   195160    26192  5150011   4e953b  i965_dri.so before
> 4928315   195152    26192  5149659   4e93db  i965_dri.so after
> ---
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp       | 22 +++++++++++-----------
>  src/mesa/drivers/dri/i965/brw_fs.cpp               |  4 ++--
>  src/mesa/drivers/dri/i965/brw_meta_fast_clear.c    |  4 ++--
>  src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c  |  4 ++--
>  src/mesa/drivers/dri/i965/brw_misc_state.c         |  4 ++--
>  src/mesa/drivers/dri/i965/brw_sampler_state.c      |  4 ++--
>  src/mesa/drivers/dri/i965/brw_sf_state.c           |  9 +++++----
>  src/mesa/drivers/dri/i965/brw_state_cache.c        |  2 +-
>  src/mesa/drivers/dri/i965/brw_util.h               |  4 ++--
>  src/mesa/drivers/dri/i965/gen6_multisample_state.c |  4 ++--
>  src/mesa/drivers/dri/i965/gen6_sf_state.c          |  2 +-
>  src/mesa/drivers/dri/i965/gen7_sf_state.c          |  2 +-
>  src/mesa/drivers/dri/i965/gen7_urb.c               |  2 +-
>  src/mesa/drivers/dri/i965/gen8_sf_state.c          |  2 +-
>  14 files changed, 35 insertions(+), 34 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 1561b59..205c905 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -1285,8 +1285,8 @@ brw_blorp_blit_program::translate_dst_to_src()
>        /* Round the float coordinates down to nearest integer */
>        emit_rndd(Xp_f, X_f);
>        emit_rndd(Yp_f, Y_f);
> -      emit_mul(X_f, Xp_f, brw_imm_f(1 / key->x_scale));
> -      emit_mul(Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
> +      emit_mul(X_f, Xp_f, brw_imm_f(1.0f / key->x_scale));
> +      emit_mul(Y_f, Yp_f, brw_imm_f(1.0f / key->y_scale));
>        SWAP_XY_AND_XPYP();
>     } else if (!key->bilinear_filter) {
>        /* Round the float coordinates down to nearest integer by moving to
> @@ -1442,7 +1442,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples)
>        for (int j = 0; j < 4; ++j) {
>           emit_mul(offset(texture_data[0], 2*j),
>                   offset(vec8(texture_data[0]), 2*j),
> -                 brw_imm_f(1.0/num_samples));
> +                 brw_imm_f(1.0f / num_samples));
>        }
>     }
>  
> @@ -1475,9 +1475,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
>  
>        /* Compute pixel coordinates */
>        emit_add(vec16(x_sample_coords), Xp_f,
> -              brw_imm_f((float)(i & 0x1) * (1.0 / key->x_scale)));
> +              brw_imm_f((float)(i & 0x1) * (1.0f / key->x_scale)));
>        emit_add(vec16(y_sample_coords), Yp_f,
> -              brw_imm_f((float)((i >> 1) & 0x1) * (1.0 / key->y_scale)));
> +              brw_imm_f((float)((i >> 1) & 0x1) * (1.0f / key->y_scale)));
>        emit_mov(vec16(X), x_sample_coords);
>        emit_mov(vec16(Y), y_sample_coords);
>  
> @@ -1789,7 +1789,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
>         * so 0.5 provides the necessary correction.
>         */
>        multiplier = scale;
> -      offset = src0 + (-dst0 + 0.5) * scale;
> +      offset = src0 + (-dst0 + 0.5f) * scale;
>     } else {
>        /* When mirroring X we need:
>         *   src_x - src_x0 = dst_x1 - dst_x - 0.5
> @@ -1797,7 +1797,7 @@ brw_blorp_coord_transform_params::setup(GLfloat src0, GLfloat src1,
>         *   src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
>         */
>        multiplier = -scale;
> -      offset = src0 + (dst1 - 0.5) * scale;
> +      offset = src0 + (dst1 - 0.5f) * scale;
>     }
>  }
>  
> @@ -1952,8 +1952,8 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>     /* Scaling factors used for bilinear filtering in multisample scaled
>      * blits.
>      */
> -   wm_prog_key.x_scale = 2.0;
> -   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
> +   wm_prog_key.x_scale = 2.0f;
> +   wm_prog_key.y_scale = src_mt->num_samples / 2.0f;
>  
>     if (filter == GL_LINEAR && src.num_samples <= 1 && dst.num_samples <= 1)
>        wm_prog_key.bilinear_filter = true;
> @@ -2000,9 +2000,9 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>     x1 = wm_push_consts.dst_x1 = roundf(dst_x1);
>     y1 = wm_push_consts.dst_y1 = roundf(dst_y1);
>     wm_push_consts.rect_grid_x1 = (minify(src_mt->logical_width0, src_level) *
> -                                  wm_prog_key.x_scale - 1.0);
> +                                  wm_prog_key.x_scale - 1.0f);
>     wm_push_consts.rect_grid_y1 = (minify(src_mt->logical_height0, src_level) *
> -                                  wm_prog_key.y_scale - 1.0);
> +                                  wm_prog_key.y_scale - 1.0f);
>  
>     wm_push_consts.x_transform.setup(src_x0, src_x1, dst_x0, dst_x1, mirror_x);
>     wm_push_consts.y_transform.setup(src_y0, src_y1, dst_y0, dst_y1, mirror_y);
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 189da1d..02f1b3b 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -857,11 +857,11 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
>        bld.MOV(wpos, this->pixel_y);
>     } else {
>        fs_reg pixel_y = this->pixel_y;
> -      float offset = (pixel_center_integer ? 0.0 : 0.5);
> +      float offset = (pixel_center_integer ? 0.0f : 0.5f);
>  
>        if (flip) {
>  	 pixel_y.negate = true;
> -	 offset += key->drawable_height - 1.0;
> +	 offset += key->drawable_height - 1.0f;
>        }
>  
>        bld.ADD(wpos, pixel_y, fs_reg(offset));
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> index 5b8191c..8d71964 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_fast_clear.c
> @@ -348,7 +348,7 @@ is_color_fast_clear_compatible(struct brw_context *brw,
>     }
>  
>     for (int i = 0; i < 4; i++) {
> -      if (color->f[i] != 0.0 && color->f[i] != 1.0 &&
> +      if (color->f[i] != 0.0f && color->f[i] != 1.0f &&
>            _mesa_format_has_color_component(format, i)) {
>           return false;
>        }
> @@ -366,7 +366,7 @@ compute_fast_clear_color_bits(const union gl_color_union *color)
>     uint32_t bits = 0;
>     for (int i = 0; i < 4; i++) {
>        /* Testing for non-0 works for integer and float colors */
> -      if (color->f[i] != 0.0)
> +      if (color->f[i] != 0.0f)
>           bits |= 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i));
>     }
>     return bits;
> diff --git a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
> index d4abfe6..aa6df16 100644
> --- a/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
> +++ b/src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
> @@ -239,10 +239,10 @@ setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset,
>  
>     if (mirror) {
>        _mesa_Uniform1f(multiplier, -scale);
> -      _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale);
> +      _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5f) * scale);
>     } else {
>        _mesa_Uniform1f(multiplier, scale);
> -      _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale);
> +      _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5f) * scale);
>     }
>  }
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
> index 1bbb16c..16b0ed2 100644
> --- a/src/mesa/drivers/dri/i965/brw_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
> @@ -834,13 +834,13 @@ static void upload_line_stipple(struct brw_context *brw)
>  
>     if (brw->gen >= 7) {
>        /* in U1.16 */
> -      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
> +      tmp = 1.0f / ctx->Line.StippleFactor;
>        tmpi = tmp * (1<<16);
>        OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor);
>     }
>     else {
>        /* in U1.13 */
> -      tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor;
> +      tmp = 1.0f / ctx->Line.StippleFactor;
>        tmpi = tmp * (1<<13);
>        OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor);
>     }
> diff --git a/src/mesa/drivers/dri/i965/brw_sampler_state.c b/src/mesa/drivers/dri/i965/brw_sampler_state.c
> index 22ccbfe..2021bb3 100644
> --- a/src/mesa/drivers/dri/i965/brw_sampler_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_sampler_state.c
> @@ -425,11 +425,11 @@ brw_update_sampler_state(struct brw_context *brw,
>  
>     /* Enable anisotropic filtering if desired. */
>     unsigned max_anisotropy = BRW_ANISORATIO_2;
> -   if (sampler->MaxAnisotropy > 1.0) {
> +   if (sampler->MaxAnisotropy > 1.0f) {
>        min_filter = BRW_MAPFILTER_ANISOTROPIC;
>        mag_filter = BRW_MAPFILTER_ANISOTROPIC;
>  
> -      if (sampler->MaxAnisotropy > 2.0) {
> +      if (sampler->MaxAnisotropy > 2.0f) {
>  	 max_anisotropy =
>              MIN2((sampler->MaxAnisotropy - 2) / 2, BRW_ANISORATIO_16);
>        }
> diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c
> index 3be6e4a..b126f82 100644
> --- a/src/mesa/drivers/dri/i965/brw_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c
> @@ -220,7 +220,7 @@ static void upload_sf_unit( struct brw_context *brw )
>  
>     /* _NEW_LINE */
>     sf->sf6.line_width =
> -      CLAMP(ctx->Line.Width, 1.0, ctx->Const.MaxLineWidth) * (1<<1);
> +      CLAMP(ctx->Line.Width, 1.0f, ctx->Const.MaxLineWidth) * (1<<1);
>  
>     sf->sf6.line_endcap_aa_region_width = 1;
>     if (ctx->Line.SmoothFlag)
> @@ -259,9 +259,10 @@ static void upload_sf_unit( struct brw_context *brw )
>  
>     /* _NEW_POINT */
>     sf->sf7.sprite_point = ctx->Point.PointSprite;
> -   sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size,
> -					 ctx->Point.MinSize,
> -					 ctx->Point.MaxSize)), 1, 255) * (1<<3);
> +   sf->sf7.point_size = CLAMP(rintf(CLAMP(ctx->Point.Size,
> +                                          ctx->Point.MinSize,
> +                                          ctx->Point.MaxSize)), 1.0f, 255.0f) *
> +                        (1<<3);
>     /* _NEW_PROGRAM | _NEW_POINT */
>     sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
>  				    ctx->Point._Attenuated);
> diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
> index 157b33d..693441c 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_cache.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
> @@ -323,7 +323,7 @@ brw_upload_cache(struct brw_cache *cache,
>  
>     item->key = tmp;
>  
> -   if (cache->n_items > cache->size * 1.5)
> +   if (cache->n_items > cache->size * 1.5f)
>        rehash(cache);
>  
>     hash %= cache->size;
> diff --git a/src/mesa/drivers/dri/i965/brw_util.h b/src/mesa/drivers/dri/i965/brw_util.h
> index 04e4e94..68f4318 100644
> --- a/src/mesa/drivers/dri/i965/brw_util.h
> +++ b/src/mesa/drivers/dri/i965/brw_util.h
> @@ -53,14 +53,14 @@ brw_get_line_width(struct brw_context *brw)
>     float line_width =
>        CLAMP(!brw->ctx.Multisample._Enabled && !brw->ctx.Line.SmoothFlag
>              ? roundf(brw->ctx.Line.Width) : brw->ctx.Line.Width,
> -            0.0, brw->ctx.Const.MaxLineWidth);
> +            0.0f, brw->ctx.Const.MaxLineWidth);
>     uint32_t line_width_u3_7 = U_FIXED(line_width, 7);
>  
>     /* Line width of 0 is not allowed when MSAA enabled */
>     if (brw->ctx.Multisample._Enabled) {
>        if (line_width_u3_7 == 0)
>           line_width_u3_7 = 1;
> -   } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5) {
> +   } else if (brw->ctx.Line.SmoothFlag && line_width < 1.5f) {
>        /* For 1 pixel line thickness or less, the general
>         * anti-aliasing algorithm gives up, and a garbage line is
>         * generated.  Setting a Line Width of 0.0 specifies the
> diff --git a/src/mesa/drivers/dri/i965/gen6_multisample_state.c b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
> index 36734f5..cf1421e 100644
> --- a/src/mesa/drivers/dri/i965/gen6_multisample_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_multisample_state.c
> @@ -148,7 +148,7 @@ unsigned
>  gen6_determine_sample_mask(struct brw_context *brw)
>  {
>     struct gl_context *ctx = &brw->ctx;
> -   float coverage = 1.0;
> +   float coverage = 1.0f;
>     float coverage_invert = false;
>     unsigned sample_mask = ~0u;
>  
> @@ -166,7 +166,7 @@ gen6_determine_sample_mask(struct brw_context *brw)
>     }
>  
>     if (num_samples > 1) {
> -      int coverage_int = (int) (num_samples * coverage + 0.5);
> +      int coverage_int = (int) (num_samples * coverage + 0.5f);
>        uint32_t coverage_bits = (1 << coverage_int) - 1;
>        if (coverage_invert)
>           coverage_bits ^= (1 << num_samples) - 1;
> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index b00517e..4068f28 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -383,7 +383,7 @@ upload_sf_state(struct brw_context *brw)
>     point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
>  
>     /* Clamp to the hardware limits and convert to fixed point */
> -   dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
> +   dw4 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
>  
>     /*
>      * Window coordinates in an FBO are inverted, which means point
> diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c
> index 4fa46a8..698b3d4 100644
> --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
> @@ -220,7 +220,7 @@ upload_sf_state(struct brw_context *brw)
>     point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
>  
>     /* Clamp to the hardware limits and convert to fixed point */
> -   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
> +   dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
>  
>     /* _NEW_LIGHT */
>     if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
> diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c b/src/mesa/drivers/dri/i965/gen7_urb.c
> index d371c19..6916217 100644
> --- a/src/mesa/drivers/dri/i965/gen7_urb.c
> +++ b/src/mesa/drivers/dri/i965/gen7_urb.c
> @@ -228,7 +228,7 @@ gen7_upload_urb(struct brw_context *brw)
>        remaining_space = total_wants;
>     if (remaining_space > 0) {
>        unsigned vs_additional = (unsigned)
> -         round(vs_wants * (((double) remaining_space) / total_wants));
> +         roundf(vs_wants * (((float) remaining_space) / total_wants));
>        vs_chunks += vs_additional;
>        remaining_space -= vs_additional;
>        gs_chunks += remaining_space;
> diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c
> index c2b585d..6b655ee 100644
> --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
> @@ -169,7 +169,7 @@ upload_sf(struct brw_context *brw)
>     point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
>  
>     /* Clamp to the hardware limits and convert to fixed point */
> -   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
> +   dw3 |= U_FIXED(CLAMP(point_size, 0.125f, 255.875f), 3);
>  
>     /* _NEW_PROGRAM | _NEW_POINT */
>     if (!(ctx->VertexProgram.PointSizeEnabled || ctx->Point._Attenuated))




More information about the mesa-dev mailing list