[Intel-gfx] [PATCH 3/7] drm/i915: Improved w/a for rps on Baytrail
Deepak S
deepak.s at linux.intel.com
Wed Mar 18 00:56:02 PDT 2015
On Friday 06 March 2015 08:36 PM, Chris Wilson wrote:
> Rewrite commit 31685c258e0b0ad6aa486c5ec001382cf8a64212
> Author: Deepak S <deepak.s at linux.intel.com>
> Date: Thu Jul 3 17:33:01 2014 -0400
>
> drm/i915/vlv: WA for Turbo and RC6 to work together.
>
> Other than code clarity, the major improvement is to disable the extra
> interrupts generated when idle. However, the reclocking remains rather
> slow under the new manual regime, in particular it fails to downclock as
> quickly as desired. The second major improvement is that for certain
> workloads, like games, we need to combine render+media activity counters
> as the work of displaying the frame is split across the engines and both
> need to be taken into account when deciding the global GPU frequency as
> memory cycles are shared.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Deepak S <deepak.s at linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
>
> Conflicts:
> drivers/gpu/drm/i915/intel_pm.c
> ---
> drivers/gpu/drm/i915/i915_irq.c | 155 +++++++++++++----------------------
> drivers/gpu/drm/i915/i915_reg.h | 4 +-
> drivers/gpu/drm/i915/intel_display.c | 2 +
> drivers/gpu/drm/i915/intel_drv.h | 2 +
> drivers/gpu/drm/i915/intel_pm.c | 22 ++++-
> 5 files changed, 81 insertions(+), 104 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 1296ce37e435..4b7b86298b37 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -997,129 +997,84 @@ static void notify_ring(struct drm_device *dev,
> wake_up_all(&ring->irq_queue);
> }
>
> -static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
> - struct intel_rps_ei *rps_ei)
> +static void vlv_c0_read(struct drm_i915_private *dev_priv,
> + struct intel_rps_ei *ei)
> {
> - u32 cz_ts, cz_freq_khz;
> - u32 render_count, media_count;
> - u32 elapsed_render, elapsed_media, elapsed_time;
> - u32 residency = 0;
> -
> - cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> - cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);
> -
> - render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
> - media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);
> -
> - if (rps_ei->cz_clock == 0) {
> - rps_ei->cz_clock = cz_ts;
> - rps_ei->render_c0 = render_count;
> - rps_ei->media_c0 = media_count;
> -
> - return dev_priv->rps.cur_freq;
> - }
> -
> - elapsed_time = cz_ts - rps_ei->cz_clock;
> - rps_ei->cz_clock = cz_ts;
> + ei->cz_clock = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
> + ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> + ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> +}
>
> - elapsed_render = render_count - rps_ei->render_c0;
> - rps_ei->render_c0 = render_count;
> +static bool vlv_c0_above(struct drm_i915_private *dev_priv,
> + const struct intel_rps_ei *old,
> + const struct intel_rps_ei *now,
> + int threshold)
> +{
> + u64 time, c0;
>
> - elapsed_media = media_count - rps_ei->media_c0;
> - rps_ei->media_c0 = media_count;
> + if (old->cz_clock == 0)
> + return false;
>
> - /* Convert all the counters into common unit of milli sec */
> - elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
> - elapsed_render /= cz_freq_khz;
> - elapsed_media /= cz_freq_khz;
> + time = now->cz_clock - old->cz_clock;
> + time *= threshold * dev_priv->mem_freq;
>
> - /*
> - * Calculate overall C0 residency percentage
> - * only if elapsed time is non zero
> + /* Workload can be split between render + media, e.g. SwapBuffers
> + * being blitted in X after being rendered in mesa. To account for
> + * this we need to combine both engines into our activity counter.
> */
> - if (elapsed_time) {
> - residency =
> - ((max(elapsed_render, elapsed_media) * 100)
> - / elapsed_time);
> - }
> + c0 = now->render_c0 - old->render_c0;
> + c0 += now->media_c0 - old->media_c0;
> + c0 *= 100 * VLV_CZ_CLOCK_TO_MILLI_SEC * 4 / 1000;
>
> - return residency;
> + return c0 >= time;
> }
>
> -/**
> - * vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
> - * busy-ness calculated from C0 counters of render & media power wells
> - * @dev_priv: DRM device private
> - *
> - */
> -static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> {
> - u32 residency_C0_up = 0, residency_C0_down = 0;
> - int new_delay, adj;
> -
> - dev_priv->rps.ei_interrupt_count++;
> -
> - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
> + vlv_c0_read(dev_priv, &dev_priv->rps.down_ei);
> + dev_priv->rps.up_ei = dev_priv->rps.down_ei;
> + dev_priv->rps.ei_interrupt_count = 0;
> +}
>
> +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> +{
> + struct intel_rps_ei now;
> + u32 events = 0;
>
> - if (dev_priv->rps.up_ei.cz_clock == 0) {
> - vlv_c0_residency(dev_priv, &dev_priv->rps.up_ei);
> - vlv_c0_residency(dev_priv, &dev_priv->rps.down_ei);
> - return dev_priv->rps.cur_freq;
> - }
> + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> + return 0;
>
> + vlv_c0_read(dev_priv, &now);
> + if (now.cz_clock == 0)
> + return 0;
>
> /*
> * To down throttle, C0 residency should be less than down threshold
> * for continous EI intervals. So calculate down EI counters
> * once in VLV_INT_COUNT_FOR_DOWN_EI
> */
> - if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {
> -
> + if (++dev_priv->rps.ei_interrupt_count >= VLV_INT_COUNT_FOR_DOWN_EI) {
> + pm_iir |= GEN6_PM_RP_DOWN_EI_EXPIRED;
> dev_priv->rps.ei_interrupt_count = 0;
> -
> - residency_C0_down = vlv_c0_residency(dev_priv,
> - &dev_priv->rps.down_ei);
> - } else {
> - residency_C0_up = vlv_c0_residency(dev_priv,
> - &dev_priv->rps.up_ei);
> }
>
> - new_delay = dev_priv->rps.cur_freq;
> -
> - adj = dev_priv->rps.last_adj;
> - /* C0 residency is greater than UP threshold. Increase Frequency */
> - if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
> - if (adj > 0)
> - adj *= 2;
> - else
> - adj = 1;
> -
> - if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
> - new_delay = dev_priv->rps.cur_freq + adj;
> -
> - /*
> - * For better performance, jump directly
> - * to RPe if we're below it.
> - */
> - if (new_delay < dev_priv->rps.efficient_freq)
> - new_delay = dev_priv->rps.efficient_freq;
> + if (pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) {
> + if (!vlv_c0_above(dev_priv,
> + &dev_priv->rps.down_ei, &now,
> + VLV_RP_DOWN_EI_THRESHOLD))
> + events |= GEN6_PM_RP_DOWN_THRESHOLD;
> + dev_priv->rps.down_ei = now;
> + }
>
> - } else if (!dev_priv->rps.ei_interrupt_count &&
> - (residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
> - if (adj < 0)
> - adj *= 2;
> - else
> - adj = -1;
> - /*
> - * This means, C0 residency is less than down threshold over
> - * a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
> - */
> - if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
> - new_delay = dev_priv->rps.cur_freq + adj;
> + if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> + if (vlv_c0_above(dev_priv,
> + &dev_priv->rps.up_ei, &now,
> + VLV_RP_UP_EI_THRESHOLD))
> + events |= GEN6_PM_RP_UP_THRESHOLD;
> + dev_priv->rps.up_ei = now;
> }
>
> - return new_delay;
> + return events;
> }
>
> static void gen6_pm_rps_work(struct work_struct *work)
> @@ -1149,6 +1104,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
>
> mutex_lock(&dev_priv->rps.hw_lock);
>
> + pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> +
> adj = dev_priv->rps.last_adj;
> new_delay = dev_priv->rps.cur_freq;
> if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> @@ -1170,8 +1127,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
> else
> new_delay = dev_priv->rps.min_freq_softlimit;
> adj = 0;
> - } else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
> - new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
> } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> if (adj < 0)
> adj *= 2;
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 56b97c41bf17..324922d8f8a1 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -6190,8 +6190,8 @@ enum skl_disp_power_wells {
>
> #define GEN6_GT_GFX_RC6p 0x13810C
> #define GEN6_GT_GFX_RC6pp 0x138110
> -#define VLV_RENDER_C0_COUNT_REG 0x138118
> -#define VLV_MEDIA_C0_COUNT_REG 0x13811C
> +#define VLV_RENDER_C0_COUNT 0x138118
> +#define VLV_MEDIA_C0_COUNT 0x13811C
>
> #define GEN6_PCODE_MAILBOX 0x138124
> #define GEN6_PCODE_READY (1<<31)
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 77113d95fb71..2412002d510b 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -9167,6 +9167,8 @@ void intel_mark_busy(struct drm_device *dev)
>
> intel_runtime_pm_get(dev_priv);
> i915_update_gfx_val(dev_priv);
> + if (INTEL_INFO(dev)->gen >= 6)
> + gen6_rps_busy(dev_priv);
> dev_priv->mm.busy = true;
> }
>
> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> index ff79dca2ff8e..1e564da2fd38 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1238,6 +1238,8 @@ void intel_disable_gt_powersave(struct drm_device *dev);
> void intel_suspend_gt_powersave(struct drm_device *dev);
> void intel_reset_gt_powersave(struct drm_device *dev);
> void gen6_update_ring_freq(struct drm_device *dev);
> +void gen6_rps_busy(struct drm_i915_private *dev_priv);
> +void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> void gen6_rps_idle(struct drm_i915_private *dev_priv);
> void gen6_rps_boost(struct drm_i915_private *dev_priv);
> void ilk_wm_get_hw_state(struct drm_device *dev);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index e53724516852..b37d634bea99 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -3898,6 +3898,18 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> }
>
> +void gen6_rps_busy(struct drm_i915_private *dev_priv)
> +{
> + mutex_lock(&dev_priv->rps.hw_lock);
> + if (dev_priv->rps.enabled) {
> + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
> + gen6_rps_reset_ei(dev_priv);
> + I915_WRITE(GEN6_PMINTRMSK,
> + gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
> + }
> + mutex_unlock(&dev_priv->rps.hw_lock);
> +}
> +
> void gen6_rps_idle(struct drm_i915_private *dev_priv)
> {
> struct drm_device *dev = dev_priv->dev;
> @@ -3909,15 +3921,21 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv)
> else
> gen6_set_rps(dev_priv->dev, dev_priv->rps.idle_freq);
> dev_priv->rps.last_adj = 0;
> + I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
> }
>
> void gen6_rps_boost(struct drm_i915_private *dev_priv)
> {
> + u32 val;
> +
> mutex_lock(&dev_priv->rps.hw_lock);
> - if (dev_priv->rps.enabled) {
> - intel_set_rps(dev_priv->dev, dev_priv->rps.max_freq_softlimit);
> + val = dev_priv->rps.max_freq_softlimit;
> + if (dev_priv->rps.enabled &&
> + dev_priv->mm.busy &&
> + dev_priv->rps.cur_freq < val) {
> + intel_set_rps(dev_priv->dev, val);
> dev_priv->rps.last_adj = 0;
> }
> mutex_unlock(&dev_priv->rps.hw_lock);
EI Calculation and changes looks fine to me
Reviewed-by: Deepak S<deepak.s at linux.intel.com>
More information about the Intel-gfx
mailing list