[Intel-gfx] [PATCH 3/3] drm/i915: Streamline the artihmetic

Wed Apr 29 19:11:04 UTC 2020

Quoting Ville Syrjala (2020-04-29 19:54:57)
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> 
> All these ROUNDIND_FACTORs and whatnot are making this thing hard to
> read. Get rid of them. And let's massage some of the fractions to
> give us less questionable intermediate results and perhaps less
> divisions.
> 
> Also looks like a good helping of 64bit math stuff is needed to
> avoid some of overflows present in the current code. There
> might still be a few overflows, namely when calculating
> link_clks_available/samples_room (would require a huge hblank
> though), and potentially when calculating hblank_rise (not sure
> how large link_clks_active can get).
> 
> It looks like we're still not calculating exactly what the spec says
> since we truncate tu_data and tu_line early. But I'm too lazy to
> figure out if we could avoid that.
> 
> Cc: Anshuman Gupta <anshuman.gupta at intel.com>
> Cc: Uma Shankar <uma.shankar at intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>  drivers/gpu/drm/i915/display/intel_audio.c | 54 ++++++++--------------
>  1 file changed, 19 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
> index 00f7a3cf9a04..05cab508c626 100644
> --- a/drivers/gpu/drm/i915/display/intel_audio.c
> +++ b/drivers/gpu/drm/i915/display/intel_audio.c
> @@ -517,16 +517,16 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder,
>  /* Add a factor to take care of rounding and truncations */
>  #define ROUNDING_FACTOR 10000
>  
> -static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder,
> -                                                  const struct intel_crtc_state *crtc_state)
> +static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
> +                                          const struct intel_crtc_state *crtc_state)
>  {
>         struct drm_i915_private *i915 = to_i915(encoder->base.dev);
>         unsigned int link_clks_available, link_clks_required;
>         unsigned int tu_data, tu_line, link_clks_active;
> -       unsigned int hblank_rise, hblank_early_prog;
>         unsigned int h_active, h_total, hblank_delta, pixel_clk;
>         unsigned int fec_coeff, cdclk, vdsc_bpp;
>         unsigned int link_clk, lanes;
> +       unsigned int hblank_rise;
>  
>         h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
>         h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
> @@ -542,44 +542,33 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder
>                     "lanes = %u vdsc_bpp = %u cdclk = %u\n",
>                     h_active, link_clk, lanes, vdsc_bpp, cdclk);
>  
> -       if (WARN_ON(!link_clk || !lanes || !vdsc_bpp || !cdclk))
> +       if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
>                 return 0;
>  
> -       link_clks_available = ((((h_total - h_active) *
> -                              ((link_clk * ROUNDING_FACTOR) /
> -                               pixel_clk)) / ROUNDING_FACTOR) - 28);
> -       link_clks_required = DIV_ROUND_UP(192000, (1000 * pixel_clk / h_total)) * ((48 /
> -                                         lanes) + 2);
> +       link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
> +       link_clks_required = DIV_ROUND_UP(192000 * h_total, 1000 * pixel_clk) * (48 / lanes + 2);

That's a relief.

>  
>         if (link_clks_available > link_clks_required)
>                 hblank_delta = 32;
>         else
> -               hblank_delta = DIV_ROUND_UP(((((5 * ROUNDING_FACTOR) /
> -                                           link_clk) + ((5 *
> -                                           ROUNDING_FACTOR) /
> -                                           cdclk)) * pixel_clk),
> -                                           ROUNDING_FACTOR);
> +               hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * link_clk + 5 * cdclk, pixel_clk),
> +                                                 mul_u32_u32(link_clk, cdclk));

5 * mul_u32_u32(link_clk, cdclk, pixel_clk)

>  
> -       tu_data = (pixel_clk * vdsc_bpp * 8) / ((link_clk *
> -                  lanes * fec_coeff) / 1000000);
> -       tu_line = (((h_active * link_clk * fec_coeff) /
> -                  1000000) / (64 * pixel_clk));
> +       tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
> +                           mul_u32_u32(link_clk * lanes, fec_coeff));

That 1000000 is fec_scale.

> +       tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
> +                           mul_u32_u32(64 * pixel_clk, 1000000));
>         link_clks_active  = (tu_line - 1) * 64 + tu_data;

Transformations look correct.

> -       hblank_rise = ((link_clks_active + 6 * DIV_ROUND_UP(link_clks_active,
> -                       250) + 4) * ((pixel_clk * ROUNDING_FACTOR) /
> -                       link_clk)) / ROUNDING_FACTOR;
> +       hblank_rise = (link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, 250) + 4) * pixel_clk / link_clk;
>  
> -       hblank_early_prog = h_active - hblank_rise + hblank_delta;
> -
> -       return hblank_early_prog;
> +       return h_active - hblank_rise + hblank_delta;

ROUNDING_FACTOR doesn't seem to do any rounding.

>  }
>  
> -static unsigned int get_sample_room_req_config(const struct intel_crtc_state *crtc_state)
> +static unsigned int calc_samples_room(const struct intel_crtc_state *crtc_state)
>  {
>         unsigned int h_active, h_total, pixel_clk;
>         unsigned int link_clk, lanes;
> -       unsigned int samples_room;
>  
>         h_active = crtc_state->hw.adjusted_mode.hdisplay;
>         h_total = crtc_state->hw.adjusted_mode.htotal;
> @@ -587,12 +576,8 @@ static unsigned int get_sample_room_req_config(const struct intel_crtc_state *cr
>         link_clk = crtc_state->port_clock;
>         lanes = crtc_state->lane_count;
>  
> -       samples_room = ((((h_total - h_active) * ((link_clk *
> -                       ROUNDING_FACTOR) / pixel_clk)) /
> -                       ROUNDING_FACTOR) - 12) / ((48 /
> -                       lanes) + 2);
> -
> -       return samples_room;
> +       return ((h_total - h_active) * link_clk - 12 * pixel_clk) /
> +               (pixel_clk * (48 / lanes + 2));

Expansion of fractions looks fine.

The maths looks to be the same, so
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>

Don't ask me about the meaning of it though.

One question might be if there are known inputs/outputs we can build
into a unit test.
-Chris