[Intel-gfx] [PATCH 3/3] drm/i915: Streamline the artihmetic
Chris Wilson
chris at chris-wilson.co.uk
Wed Apr 29 19:11:04 UTC 2020
Quoting Ville Syrjala (2020-04-29 19:54:57)
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>
> All these ROUNDIND_FACTORs and whatnot are making this thing hard to
> read. Get rid of them. And let's massage some of the fractions to
> give us less questionable intermediate results and perhaps less
> divisions.
>
> Also looks like a good helping of 64bit math stuff is needed to
> avoid some of overflows present in the current code. There
> might still be a few overflows, namely when calculating
> link_clks_available/samples_room (would require a huge hblank
> though), and potentially when calculating hblank_rise (not sure
> how large link_clks_active can get).
>
> It looks like we're still not calculating exactly what the spec says
> since we truncate tu_data and tu_line early. But I'm too lazy to
> figure out if we could avoid that.
>
> Cc: Anshuman Gupta <anshuman.gupta at intel.com>
> Cc: Uma Shankar <uma.shankar at intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/display/intel_audio.c | 54 ++++++++--------------
> 1 file changed, 19 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
> index 00f7a3cf9a04..05cab508c626 100644
> --- a/drivers/gpu/drm/i915/display/intel_audio.c
> +++ b/drivers/gpu/drm/i915/display/intel_audio.c
> @@ -517,16 +517,16 @@ static void hsw_audio_codec_disable(struct intel_encoder *encoder,
> /* Add a factor to take care of rounding and truncations */
> #define ROUNDING_FACTOR 10000
>
> -static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder,
> - const struct intel_crtc_state *crtc_state)
> +static unsigned int calc_hblank_early_prog(struct intel_encoder *encoder,
> + const struct intel_crtc_state *crtc_state)
> {
> struct drm_i915_private *i915 = to_i915(encoder->base.dev);
> unsigned int link_clks_available, link_clks_required;
> unsigned int tu_data, tu_line, link_clks_active;
> - unsigned int hblank_rise, hblank_early_prog;
> unsigned int h_active, h_total, hblank_delta, pixel_clk;
> unsigned int fec_coeff, cdclk, vdsc_bpp;
> unsigned int link_clk, lanes;
> + unsigned int hblank_rise;
>
> h_active = crtc_state->hw.adjusted_mode.crtc_hdisplay;
> h_total = crtc_state->hw.adjusted_mode.crtc_htotal;
> @@ -542,44 +542,33 @@ static unsigned int get_hblank_early_enable_config(struct intel_encoder *encoder
> "lanes = %u vdsc_bpp = %u cdclk = %u\n",
> h_active, link_clk, lanes, vdsc_bpp, cdclk);
>
> - if (WARN_ON(!link_clk || !lanes || !vdsc_bpp || !cdclk))
> + if (WARN_ON(!link_clk || !pixel_clk || !lanes || !vdsc_bpp || !cdclk))
> return 0;
>
> - link_clks_available = ((((h_total - h_active) *
> - ((link_clk * ROUNDING_FACTOR) /
> - pixel_clk)) / ROUNDING_FACTOR) - 28);
> - link_clks_required = DIV_ROUND_UP(192000, (1000 * pixel_clk / h_total)) * ((48 /
> - lanes) + 2);
> + link_clks_available = (h_total - h_active) * link_clk / pixel_clk - 28;
> + link_clks_required = DIV_ROUND_UP(192000 * h_total, 1000 * pixel_clk) * (48 / lanes + 2);
That's a relief.
>
> if (link_clks_available > link_clks_required)
> hblank_delta = 32;
> else
> - hblank_delta = DIV_ROUND_UP(((((5 * ROUNDING_FACTOR) /
> - link_clk) + ((5 *
> - ROUNDING_FACTOR) /
> - cdclk)) * pixel_clk),
> - ROUNDING_FACTOR);
> + hblank_delta = DIV64_U64_ROUND_UP(mul_u32_u32(5 * link_clk + 5 * cdclk, pixel_clk),
> + mul_u32_u32(link_clk, cdclk));
5 * mul_u32_u32(link_clk, cdclk, pixel_clk)
>
> - tu_data = (pixel_clk * vdsc_bpp * 8) / ((link_clk *
> - lanes * fec_coeff) / 1000000);
> - tu_line = (((h_active * link_clk * fec_coeff) /
> - 1000000) / (64 * pixel_clk));
> + tu_data = div64_u64(mul_u32_u32(pixel_clk * vdsc_bpp * 8, 1000000),
> + mul_u32_u32(link_clk * lanes, fec_coeff));
That 1000000 is fec_scale.
> + tu_line = div64_u64(h_active * mul_u32_u32(link_clk, fec_coeff),
> + mul_u32_u32(64 * pixel_clk, 1000000));
> link_clks_active = (tu_line - 1) * 64 + tu_data;
Transformations look correct.
> - hblank_rise = ((link_clks_active + 6 * DIV_ROUND_UP(link_clks_active,
> - 250) + 4) * ((pixel_clk * ROUNDING_FACTOR) /
> - link_clk)) / ROUNDING_FACTOR;
> + hblank_rise = (link_clks_active + 6 * DIV_ROUND_UP(link_clks_active, 250) + 4) * pixel_clk / link_clk;
>
> - hblank_early_prog = h_active - hblank_rise + hblank_delta;
> -
> - return hblank_early_prog;
> + return h_active - hblank_rise + hblank_delta;
ROUNDING_FACTOR doesn't seem to do any rounding.
> }
>
> -static unsigned int get_sample_room_req_config(const struct intel_crtc_state *crtc_state)
> +static unsigned int calc_samples_room(const struct intel_crtc_state *crtc_state)
> {
> unsigned int h_active, h_total, pixel_clk;
> unsigned int link_clk, lanes;
> - unsigned int samples_room;
>
> h_active = crtc_state->hw.adjusted_mode.hdisplay;
> h_total = crtc_state->hw.adjusted_mode.htotal;
> @@ -587,12 +576,8 @@ static unsigned int get_sample_room_req_config(const struct intel_crtc_state *cr
> link_clk = crtc_state->port_clock;
> lanes = crtc_state->lane_count;
>
> - samples_room = ((((h_total - h_active) * ((link_clk *
> - ROUNDING_FACTOR) / pixel_clk)) /
> - ROUNDING_FACTOR) - 12) / ((48 /
> - lanes) + 2);
> -
> - return samples_room;
> + return ((h_total - h_active) * link_clk - 12 * pixel_clk) /
> + (pixel_clk * (48 / lanes + 2));
Expansion of fractions looks fine.
The maths looks to be the same, so
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
Don't ask me about the meaning of it though.
One question might be if there are known inputs/outputs we can build
into a unit test.
-Chris
More information about the Intel-gfx
mailing list