[Intel-gfx] [PATCH 3/7] drm/i915: Extend vlv/chv residency resolution
Chris Wilson
chris at chris-wilson.co.uk
Tue Mar 14 14:16:32 UTC 2017
On Tue, Mar 14, 2017 at 03:17:25PM +0200, Mika Kuoppala wrote:
> The high counter value bit can be used to get 8 bits more
> of range out of the same residency counter registers.
Please do note that it is internally a 40bit register with a 32bit
window (and a similar comment in code).
> Lets toggle this bit on and off on vlv/chv while reading the
> counters to push the wrap from 13 seconds to 54 minutes.
>
> Reported-by: Len Brown <len.brown at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
> ---
> drivers/gpu/drm/i915/intel_pm.c | 47 +++++++++++++++++++++++++++++++++++++----
> 1 file changed, 43 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index da742a9..7e7a8d9 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -8350,6 +8350,44 @@ void intel_pm_setup(struct drm_i915_private *dev_priv)
> atomic_set(&dev_priv->pm.wakeref_count, 0);
> }
>
> +static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
> + const i915_reg_t reg)
> +{
> + u32 lower, upper, tmp, saved_ctl;
> +
> + /* The register accessed do not need forcewake. We borrow
> + * uncore lock to prevent concurrent access to range reg.
> + */
> + spin_lock_irq(&dev_priv->uncore.lock);
> + saved_ctl = I915_READ_FW(VLV_COUNTER_CONTROL);
> +
> + if (!(saved_ctl & VLV_COUNT_RANGE_HIGH))
> + I915_WRITE_FW(VLV_COUNTER_CONTROL,
> + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
> +
> + tmp = I915_READ_FW(reg);
Do upper = READ;
> + do {
> + upper = tmp;
tmp = upper;
> +
> + I915_WRITE_FW(VLV_COUNTER_CONTROL,
> + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
> + lower = I915_READ_FW(reg);
> +
> + I915_WRITE_FW(VLV_COUNTER_CONTROL,
> + _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
> +
Kill this newline, so both look equivalent (i.e. so that the write is
always coupled with the read).
> + tmp = I915_READ_FW(reg);
upper = READ
Then the lower/upper are clearly read together in the same loop, with
the wraparound condition checked.
> + } while (upper != tmp);
> +
> + if (!(saved_ctl & VLV_COUNT_RANGE_HIGH))
> + I915_WRITE_FW(VLV_COUNTER_CONTROL,
> + _MASKED_BIT_DISABLE(VLV_COUNT_RANGE_HIGH));
> +
> + spin_unlock_irq(&dev_priv->uncore.lock);
> +
> + return lower | (u64)upper << 8;
> +}
> +
> u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
> const i915_reg_t reg)
> {
> @@ -8367,15 +8405,16 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
> units = 1000;
> div = dev_priv->czclk_freq;
>
> - if (I915_READ(VLV_COUNTER_CONTROL) & VLV_COUNT_RANGE_HIGH)
> - units <<= 8;
> + raw_time = vlv_residency_raw(dev_priv, reg);
> + goto out;
> } else if (IS_GEN9_LP(dev_priv)) {
> units = 1000;
> div = 1200; /* 833.33ns */
> }
>
> - raw_time = I915_READ(reg) * units;
> - ret = DIV_ROUND_UP_ULL(raw_time, div);
> + raw_time = I915_READ(reg);
> +out:
Do we need the goto? just move this I915_READ into the branches?
> + ret = DIV_ROUND_UP_ULL(raw_time * units, div);
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list