[Intel-gfx] [PATCH 34/36] drm/i915, intel_ips: Enable GPU wait-boosting with IPS
Chris Wilson
chris at chris-wilson.co.uk
Wed Mar 14 09:37:46 UTC 2018
Refactor the reclocking logic used by RPS on Ironlake to reuse the
infrastructure developed for RPS on Sandybridge+, along with the
waitboosting support for stalled clients and missed frames.
Reported-by: dimon at gmx.net
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90137
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_debugfs.c | 5 +-
drivers/gpu/drm/i915/i915_drv.h | 19 +-
drivers/gpu/drm/i915/i915_irq.c | 58 +---
drivers/gpu/drm/i915/i915_request.c | 1 -
drivers/gpu/drm/i915/i915_sysfs.c | 10 +
drivers/gpu/drm/i915/intel_gt_pm.c | 575 ++++++++++++++++++++----------------
drivers/gpu/drm/i915/intel_pm.c | 10 -
drivers/platform/x86/intel_ips.c | 14 +-
include/drm/i915_drm.h | 1 +
9 files changed, 357 insertions(+), 336 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ccb01244e616..7c7afdac8c8c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1421,6 +1421,7 @@ static int ironlake_drpc_info(struct seq_file *m)
yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
seq_printf(m, "SW control enabled: %s\n",
yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+ seq_printf(m, "RPS active? %s\n", yesno(dev_priv->gt.awake));
seq_printf(m, "Gated voltage change: %s\n",
yesno(rgvmodectl & MEMMODE_RCLK_GATE));
seq_printf(m, "Starting frequency: P%d\n",
@@ -2201,10 +2202,12 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
intel_gpu_freq(dev_priv, rps->freq),
intel_gpu_freq(dev_priv, rps->min),
intel_gpu_freq(dev_priv, rps->max));
- seq_printf(m, " min hard:%d, user:%d; max user:%d, hard:%d\n",
+ seq_printf(m, " min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
intel_gpu_freq(dev_priv, rps->min_freq_hw),
+ intel_gpu_freq(dev_priv, rps->min_freq_soft),
intel_gpu_freq(dev_priv, rps->min_freq_user),
intel_gpu_freq(dev_priv, rps->max_freq_user),
+ intel_gpu_freq(dev_priv, rps->max_freq_soft),
intel_gpu_freq(dev_priv, rps->max_freq_hw));
seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
intel_gpu_freq(dev_priv, rps->idle_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cfbcaa8556e0..82e9a58bd65f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -761,6 +761,8 @@ struct intel_rps {
u8 max_freq_hw; /* Maximum frequency, RP0 if not overclocking */
u8 min_freq_user; /* Minimum frequency permitted by the driver */
u8 max_freq_user; /* Max frequency permitted by the driver */
+ u8 min_freq_soft;
+ u8 max_freq_soft;
u8 idle_freq; /* Frequency to request when we are idle */
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
@@ -788,18 +790,14 @@ struct intel_rps {
extern spinlock_t mchdev_lock;
struct intel_ips {
- u8 cur_delay;
- u8 min_delay;
- u8 max_delay;
- u8 fmax;
- u8 fstart;
-
- u64 last_count1;
- unsigned long last_time1;
unsigned long chipset_power;
- u64 last_count2;
- u64 last_time2;
unsigned long gfx_power;
+
+ ktime_t last_time1;
+ ktime_t last_time2;
+
+ u64 last_count1;
+ u32 last_count2;
u8 corr;
int c_m;
@@ -2698,7 +2696,6 @@ extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
-extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9a52692395f2..facaae27a969 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -850,45 +850,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
return position;
}
-static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
-{
- struct intel_ips *ips = &dev_priv->gt_pm.ips;
- u32 busy_up, busy_down, max_avg, min_avg;
- u8 new_delay;
-
- spin_lock(&mchdev_lock);
-
- I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
-
- new_delay = ips->cur_delay;
-
- I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
- busy_up = I915_READ(RCPREVBSYTUPAVG);
- busy_down = I915_READ(RCPREVBSYTDNAVG);
- max_avg = I915_READ(RCBMAXAVG);
- min_avg = I915_READ(RCBMINAVG);
-
- /* Handle RCS change request from hw */
- if (busy_up > max_avg) {
- if (ips->cur_delay != ips->max_delay)
- new_delay = ips->cur_delay - 1;
- if (new_delay < ips->max_delay)
- new_delay = ips->max_delay;
- } else if (busy_down < min_avg) {
- if (ips->cur_delay != ips->min_delay)
- new_delay = ips->cur_delay + 1;
- if (new_delay > ips->min_delay)
- new_delay = ips->min_delay;
- }
-
- if (ironlake_set_drps(dev_priv, new_delay))
- ips->cur_delay = new_delay;
-
- spin_unlock(&mchdev_lock);
-
- return;
-}
-
static void notify_ring(struct intel_engine_cs *engine)
{
struct i915_request *rq = NULL;
@@ -2047,8 +2008,12 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
I915_WRITE(SDEIIR, pch_iir);
}
- if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT)
- ironlake_rps_change_irq_handler(dev_priv);
+ if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT) {
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+ rps->pm_iir = GEN6_PM_RP_DOWN_EI_EXPIRED;
+ schedule_work(&rps->work);
+ }
}
static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
@@ -3335,17 +3300,6 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
ibx_irq_postinstall(dev);
- if (IS_IRONLAKE_M(dev_priv)) {
- /* Enable PCU event interrupts
- *
- * spinlocking not required here for correctness since interrupt
- * setup is guaranteed to run in single-threaded context. But we
- * need it to make the assert_spin_locked happy. */
- spin_lock_irq(&dev_priv->irq_lock);
- ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
- spin_unlock_irq(&dev_priv->irq_lock);
- }
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 605770191ceb..5dbb1905f28a 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -275,7 +275,6 @@ static void mark_busy(struct drm_i915_private *i915)
i915->gt.epoch = 1;
intel_gt_pm_busy(i915);
- i915_update_gfx_val(i915);
i915_pmu_gt_unparked(i915);
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 2d4c7f2e0878..063cd00d2aae 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -457,6 +457,14 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
+static const struct attribute *gen5_attrs[] = {
+ &dev_attr_gt_cur_freq_mhz.attr,
+ &dev_attr_gt_max_freq_mhz.attr,
+ &dev_attr_gt_min_freq_mhz.attr,
+ &dev_attr_gt_RP0_freq_mhz.attr,
+ &dev_attr_gt_RPn_freq_mhz.attr,
+ NULL,
+};
static const struct attribute *gen6_attrs[] = {
&dev_attr_gt_act_freq_mhz.attr,
&dev_attr_gt_cur_freq_mhz.attr,
@@ -593,6 +601,8 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
ret = sysfs_create_files(&kdev->kobj, vlv_attrs);
else if (INTEL_GEN(dev_priv) >= 6)
ret = sysfs_create_files(&kdev->kobj, gen6_attrs);
+ else if (INTEL_GEN(dev_priv) >= 5)
+ ret = sysfs_create_files(&kdev->kobj, gen5_attrs);
if (ret)
DRM_ERROR("RPS sysfs setup failed\n");
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index def292cfd181..6f5c14421c90 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -34,44 +34,62 @@
* which brings the most power savings; deeper states save more power, but
* require higher latency to switch to and wake up.
*/
+static void gen5_update_gfx_val(struct drm_i915_private *dev_priv);
/*
* Lock protecting IPS related data structures
*/
DEFINE_SPINLOCK(mchdev_lock);
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+static int __ironlake_wait_for_rps(struct drm_i915_private *dev_priv)
{
+ return wait_for_atomic((I915_READ16(MEMSWCTL) & MEMCTL_CMD_STS) == 0,
+ 10) == 0;
+}
+
+static int __ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
u16 rgvswctl;
lockdep_assert_held(&mchdev_lock);
- rgvswctl = I915_READ16(MEMSWCTL);
- if (rgvswctl & MEMCTL_CMD_STS) {
- DRM_DEBUG("gpu busy, RCS change rejected\n");
- return false; /* still busy with another command */
+ if (!__ironlake_wait_for_rps(dev_priv)) {
+ DRM_DEBUG_DRIVER("gpu busy, RCS change rejected\n");
+ return -EAGAIN; /* still busy with another command */
}
- rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
- (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+ val = rps->max_freq_hw - val + rps->min_freq_hw;
+
+ rgvswctl =
+ (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+ (val << MEMCTL_FREQ_SHIFT) |
+ MEMCTL_SFCAVM;
I915_WRITE16(MEMSWCTL, rgvswctl);
POSTING_READ16(MEMSWCTL);
rgvswctl |= MEMCTL_CMD_STS;
I915_WRITE16(MEMSWCTL, rgvswctl);
- return true;
+ return 0;
+}
+
+static int ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+ if (val != dev_priv->gt_pm.rps.freq) {
+ spin_lock_irq(&mchdev_lock);
+ __ironlake_set_rps(dev_priv, val);
+ spin_unlock_irq(&mchdev_lock);
+ }
+
+ return 0;
}
static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
{
struct intel_ips *ips = &dev_priv->gt_pm.ips;
- u32 rgvmodectl;
- u8 fmax, fmin, fstart, vstart;
- spin_lock_irq(&mchdev_lock);
-
- rgvmodectl = I915_READ(MEMMODECTL);
+ spin_lock(&mchdev_lock);
/* Enable temp reporting */
I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
@@ -87,75 +105,67 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
I915_WRITE(MEMIHYST, 1);
- /* Set up min, max, and cur for interrupt handling */
- fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
- fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
- fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
- MEMMODE_FSTART_SHIFT;
-
- vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
- PXVFREQ_PX_SHIFT;
-
- ips->fmax = fmax; /* IPS callback will increase this */
- ips->fstart = fstart;
-
- ips->max_delay = fstart;
- ips->min_delay = fmin;
- ips->cur_delay = fstart;
-
- DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
- fmax, fmin, fstart);
-
I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
- /*
- * Interrupts will be enabled in ironlake_irq_postinstall
- */
-
- I915_WRITE(VIDSTART, vstart);
- POSTING_READ(VIDSTART);
-
- rgvmodectl |= MEMMODE_SWMODE_EN;
- I915_WRITE(MEMMODECTL, rgvmodectl);
-
- if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+ I915_WRITE(MEMMODECTL, I915_READ(MEMMODECTL) | MEMMODE_SWMODE_EN);
+ if (!__ironlake_wait_for_rps(dev_priv))
DRM_ERROR("stuck trying to change perf mode\n");
mdelay(1);
- ironlake_set_drps(dev_priv, fstart);
+ ips->last_count1 = I915_READ(DMIEC);
+ ips->last_count1 += I915_READ(DDREC);
+ ips->last_count1 += I915_READ(CSIEC);
+ ips->last_time1 = ktime_get_raw();
- ips->last_count1 =
- I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC);
- ips->last_time1 = jiffies_to_msecs(jiffies);
ips->last_count2 = I915_READ(GFXEC);
ips->last_time2 = ktime_get_raw_ns();
- spin_unlock_irq(&mchdev_lock);
+ spin_unlock(&mchdev_lock);
+}
+
+static void ironlake_init_drps(struct drm_i915_private *dev_priv)
+{
+ struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ u32 rgvmodectl;
+ u8 fmax, fmin, fstart;
+
+ spin_lock(&mchdev_lock);
+ rgvmodectl = I915_READ(MEMMODECTL);
+ spin_unlock(&mchdev_lock);
+
+ /* Set up min, max, and cur for interrupt handling */
+ fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+ fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+ fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+ MEMMODE_FSTART_SHIFT;
+ DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+ fmax, fmin, fstart);
+
+ rps->max_freq_hw = fmin;
+ rps->min_freq_hw = fmax;
+ rps->efficient_freq = fmin - fstart;
+
+ I915_WRITE(VIDSTART,
+ (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT);
}
static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
{
u16 rgvswctl;
- spin_lock_irq(&mchdev_lock);
+ spin_lock(&mchdev_lock);
rgvswctl = I915_READ16(MEMSWCTL);
/* Ack interrupts, disable EFC interrupt */
I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
- I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
- I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
- I915_WRITE(DEIIR, DE_PCU_EVENT);
- I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+ I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+ I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS));
- /* Go back to the starting frequency */
- ironlake_set_drps(dev_priv, dev_priv->gt_pm.ips.fstart);
- mdelay(1);
rgvswctl |= MEMCTL_CMD_STS;
- I915_WRITE(MEMSWCTL, rgvswctl);
- mdelay(1);
+ I915_WRITE16(MEMSWCTL, rgvswctl);
- spin_unlock_irq(&mchdev_lock);
+ spin_unlock(&mchdev_lock);
}
/*
@@ -376,6 +386,8 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
return valleyview_set_rps(dev_priv, val);
else if (INTEL_GEN(dev_priv) >= 6)
return gen6_set_rps(dev_priv, val);
+ else if (INTEL_GEN(dev_priv) >= 5)
+ return ironlake_set_rps(dev_priv, val);
else
return 0;
}
@@ -389,8 +401,12 @@ static int adjust_rps(struct drm_i915_private *dev_priv, int freq, int adj)
lockdep_assert_held(&rps->lock);
GEM_BUG_ON(!rps->active);
- min = rps->min_freq_user;
- max = rps->max_freq_user;
+ min = clamp_t(int,
+ rps->min_freq_soft,
+ rps->min_freq_user, rps->max_freq_user);
+ max = clamp_t(int,
+ rps->max_freq_soft,
+ min, rps->max_freq_user);
if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
max = rps->boost_freq;
@@ -480,7 +496,7 @@ static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv,
lockdep_assert_held(&dev_priv->irq_lock);
dev_priv->gt_pm.ier &= ~disable_mask;
- gen6_update_pm_irq(dev_priv, disable_mask, 0);
+ gen6_mask_pm_irq(dev_priv, disable_mask);
I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->gt_pm.ier);
/* though a barrier is missing here, but don't really need a one */
}
@@ -503,7 +519,10 @@ static void enable_rps_interrupts(struct drm_i915_private *dev_priv)
return;
spin_lock_irq(&dev_priv->irq_lock);
- gen6_enable_pm_irq(dev_priv, rps->pm_events);
+ if (INTEL_GEN(dev_priv) >= 6)
+ gen6_enable_pm_irq(dev_priv, rps->pm_events);
+ else if (IS_IRONLAKE_M(dev_priv))
+ ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
spin_unlock_irq(&dev_priv->irq_lock);
}
@@ -515,8 +534,13 @@ static void disable_rps_interrupts(struct drm_i915_private *dev_priv)
return;
spin_lock_irq(&dev_priv->irq_lock);
- I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
- gen6_disable_pm_irq(dev_priv, rps->pm_events);
+ if (INTEL_GEN(dev_priv) >= 6) {
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
+ gen6_disable_pm_irq(dev_priv, rps->pm_events);
+ } else if (IS_IRONLAKE_M(dev_priv)) {
+ ilk_disable_display_irq(dev_priv, DE_PCU_EVENT);
+ }
spin_unlock_irq(&dev_priv->irq_lock);
synchronize_irq(dev_priv->drm.irq);
@@ -570,6 +594,37 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
return events;
}
+static u32 ilk_compute_pm_iir(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+ if ((pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) == 0)
+ return 0;
+
+ spin_lock(&mchdev_lock);
+ I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS));
+ I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+
+#define busy_up I915_READ(RCPREVBSYTUPAVG)
+#define busy_down I915_READ(RCPREVBSYTDNAVG)
+#define max_avg I915_READ(RCBMAXAVG)
+#define min_avg I915_READ(RCBMINAVG)
+
+ if (busy_up > max_avg)
+ pm_iir = GEN6_PM_RP_UP_THRESHOLD;
+ else if (busy_down < min_avg)
+ pm_iir = GEN6_PM_RP_DOWN_THRESHOLD;
+ else
+ pm_iir = 0;
+
+#undef busy_up
+#undef busy_down
+#undef max_avg
+#undef min_avg
+
+ spin_unlock(&mchdev_lock);
+
+ return pm_iir;
+}
+
static void intel_rps_work(struct work_struct *work)
{
struct drm_i915_private *i915 =
@@ -579,8 +634,9 @@ static void intel_rps_work(struct work_struct *work)
int freq, adj;
u32 pm_iir;
- pm_iir = xchg(&rps->pm_iir, 0) & ~rps->pm_events;
+ pm_iir = xchg(&rps->pm_iir, 0);
pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+ pm_iir |= ilk_compute_pm_iir(i915, pm_iir);
client_boost = atomic_read(&rps->num_waiters);
@@ -620,7 +676,7 @@ static void intel_rps_work(struct work_struct *work)
if (adjust_rps(i915, freq, adj))
DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
- if (pm_iir) {
+ if (pm_iir && INTEL_GEN(i915) >= 6) {
spin_lock_irq(&i915->irq_lock);
gen6_unmask_pm_irq(i915, rps->pm_events);
spin_unlock_irq(&i915->irq_lock);
@@ -663,10 +719,10 @@ void intel_gt_pm_busy(struct drm_i915_private *dev_priv)
*/
adjust_rps(dev_priv, max(rps->freq, rps->efficient_freq), 0);
- if (INTEL_GEN(dev_priv) >= 6) {
- memset(&rps->ei, 0, sizeof(rps->ei));
- enable_rps_interrupts(dev_priv);
- }
+ memset(&rps->ei, 0, sizeof(rps->ei));
+ enable_rps_interrupts(dev_priv);
+ if (IS_GEN5(dev_priv))
+ gen5_update_gfx_val(dev_priv);
mutex_unlock(&rps->lock);
}
@@ -720,7 +776,8 @@ void intel_gt_pm_idle(struct drm_i915_private *dev_priv)
* state of the worker can be discarded.
*/
cancel_work_sync(&rps->work);
- gen6_reset_rps_interrupts(dev_priv);
+ if (INTEL_GEN(dev_priv) >= 6)
+ gen6_reset_rps_interrupts(dev_priv);
}
void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
@@ -1531,6 +1588,110 @@ static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
dev_priv->gt_pm.rps.gpll_ref_freq);
}
+static unsigned long ilk_pxfreq(u32 vidfreq)
+{
+ int div = (vidfreq & 0x3f0000) >> 16;
+ int post = (vidfreq & 0x3000) >> 12;
+ int pre = (vidfreq & 0x7);
+
+ if (!pre)
+ return 0;
+
+ return (div * 133333) / (pre << post);
+}
+
+static void ilk_init_emon(struct drm_i915_private *dev_priv)
+{
+ u32 lcfuse;
+ u8 pxw[16];
+ int i;
+
+ /* Disable to program */
+ I915_WRITE(ECR, 0);
+ POSTING_READ(ECR);
+
+ /* Program energy weights for various events */
+ I915_WRITE(SDEW, 0x15040d00);
+ I915_WRITE(CSIEW0, 0x007f0000);
+ I915_WRITE(CSIEW1, 0x1e220004);
+ I915_WRITE(CSIEW2, 0x04000004);
+
+ for (i = 0; i < 5; i++)
+ I915_WRITE(PEW(i), 0);
+ for (i = 0; i < 3; i++)
+ I915_WRITE(DEW(i), 0);
+
+ /* Program P-state weights to account for frequency power adjustment */
+ for (i = 0; i < 16; i++) {
+ u32 pxvidfreq = I915_READ(PXVFREQ(i));
+ unsigned long freq = ilk_pxfreq(pxvidfreq);
+ unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
+ PXVFREQ_PX_SHIFT;
+ unsigned long val;
+
+ val = vid * vid;
+ val *= (freq / 1000);
+ val *= 255;
+ val /= (127*127*900);
+ if (val > 0xff)
+ DRM_ERROR("bad pxval: %ld\n", val);
+ pxw[i] = val;
+ }
+ /* Render standby states get 0 weight */
+ pxw[14] = 0;
+ pxw[15] = 0;
+
+ for (i = 0; i < 4; i++) {
+ u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
+ (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
+ I915_WRITE(PXW(i), val);
+ }
+
+ /* Adjust magic regs to magic values (more experimental results) */
+ I915_WRITE(OGW0, 0);
+ I915_WRITE(OGW1, 0);
+ I915_WRITE(EG0, 0x00007f00);
+ I915_WRITE(EG1, 0x0000000e);
+ I915_WRITE(EG2, 0x000e0000);
+ I915_WRITE(EG3, 0x68000300);
+ I915_WRITE(EG4, 0x42000000);
+ I915_WRITE(EG5, 0x00140031);
+ I915_WRITE(EG6, 0);
+ I915_WRITE(EG7, 0);
+
+ for (i = 0; i < 8; i++)
+ I915_WRITE(PXWL(i), 0);
+
+ /* Enable PMON + select events */
+ I915_WRITE(ECR, 0x80000019);
+
+ lcfuse = I915_READ(LCFUSE02);
+
+ dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+}
+
+
+static void ilk_init_frequencies(struct drm_i915_private *i915)
+{
+ struct intel_ips *ips = &i915->gt_pm.ips;
+
+ ips->r_t = i915->mem_freq;
+
+ if (i915->fsb_freq <= 3200)
+ ips->c_m = 0;
+ else if (i915->fsb_freq <= 4800)
+ ips->c_m = 1;
+ else
+ ips->c_m = 2;
+}
+
+static void gen5_init_gt_powersave(struct drm_i915_private *i915)
+{
+ ilk_init_frequencies(i915);
+ ilk_init_emon(i915);
+ ironlake_init_drps(i915);
+}
+
static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -1830,18 +1991,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
}
-static unsigned int intel_pxfreq(u32 vidfreq)
-{
- unsigned int div = (vidfreq & 0x3f0000) >> 16;
- unsigned int post = (vidfreq & 0x3000) >> 12;
- unsigned int pre = (vidfreq & 0x7);
-
- if (!pre)
- return 0;
-
- return (div * 133333) / (pre << post);
-}
-
static const struct cparams {
u16 i;
u16 t;
@@ -1859,14 +2008,19 @@ static const struct cparams {
static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
{
struct intel_ips *ips = &dev_priv->gt_pm.ips;
- u64 total_count, diff, ret;
- u32 count1, count2, count3, m = 0, c = 0;
- unsigned long now = jiffies_to_msecs(jiffies), diff1;
+ u64 total_count;
+ ktime_t dt, now;
+ u32 m = 0, c = 0;
int i;
lockdep_assert_held(&mchdev_lock);
- diff1 = now - ips->last_time1;
+ /* FIXME: handle per-counter overflow */
+
+ total_count = I915_READ(DMIEC);
+ total_count += I915_READ(DDREC);
+ total_count += I915_READ(CSIEC);
+ now = ktime_get_raw();
/*
* Prevent division-by-zero if we are asking too fast.
@@ -1874,23 +2028,10 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
* faster than once in 10ms, so just return the saved value
* in such cases.
*/
- if (diff1 <= 10)
+ dt = ktime_sub(now, ips->last_time1);
+ if (ktime_to_ms(dt) <= 10)
return ips->chipset_power;
- count1 = I915_READ(DMIEC);
- count2 = I915_READ(DDREC);
- count3 = I915_READ(CSIEC);
-
- total_count = count1 + count2 + count3;
-
- /* FIXME: handle per-counter overflow */
- if (total_count < ips->last_count1) {
- diff = ~0UL - ips->last_count1;
- diff += total_count;
- } else {
- diff = total_count - ips->last_count1;
- }
-
for (i = 0; i < ARRAY_SIZE(cparams); i++) {
if (cparams[i].i == ips->c_m && cparams[i].t == ips->r_t) {
m = cparams[i].m;
@@ -1899,16 +2040,13 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
}
}
- diff = div_u64(diff, diff1);
- ret = ((m * diff) + c);
- ret = div_u64(ret, 10);
+ ips->chipset_power = div_u64(m * (total_count - ips->last_count1) + c,
+ ktime_to_ms(dt) * 10);
ips->last_count1 = total_count;
ips->last_time1 = now;
- ips->chipset_power = ret;
-
- return ret;
+ return ips->chipset_power;
}
unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
@@ -1919,11 +2057,11 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
return 0;
intel_runtime_pm_get(dev_priv);
- spin_lock_irq(&mchdev_lock);
+ spin_lock(&mchdev_lock);
val = __i915_chipset_val(dev_priv);
- spin_unlock_irq(&mchdev_lock);
+ spin_unlock(&mchdev_lock);
intel_runtime_pm_put(dev_priv);
return val;
@@ -1941,7 +2079,7 @@ unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
b = tsfs & TSFS_INTR_MASK;
- return ((m * x) / 127) - b;
+ return m * x / 127 - b;
}
static int _pxvid_to_vd(u8 pxvid)
@@ -1969,49 +2107,31 @@ static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
{
struct intel_ips *ips = &dev_priv->gt_pm.ips;
- u64 now, diff, diffms;
+ ktime_t now, dt;
u32 count;
lockdep_assert_held(&mchdev_lock);
- now = ktime_get_raw_ns();
- diffms = now - ips->last_time2;
- do_div(diffms, NSEC_PER_MSEC);
+ count = I915_READ(GFXEC);
- /* Don't divide by 0 */
- if (!diffms)
+ now = ktime_get_raw();
+ dt = ktime_sub(now, ips->last_time2);
+ if (ktime_to_ms(dt) <= 10)
return;
- count = I915_READ(GFXEC);
-
- if (count < ips->last_count2) {
- diff = ~0UL - ips->last_count2;
- diff += count;
- } else {
- diff = count - ips->last_count2;
- }
+ /* More magic constants... */
+ ips->gfx_power = div_u64(1181ull * (count - ips->last_count2),
+ ktime_to_ms(dt) * 10);
ips->last_count2 = count;
ips->last_time2 = now;
-
- /* More magic constants... */
- diff = diff * 1181;
- diff = div_u64(diff, diffms * 10);
- ips->gfx_power = diff;
}
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+static void gen5_update_gfx_val(struct drm_i915_private *dev_priv)
{
- if (INTEL_GEN(dev_priv) != 5)
- return;
-
- intel_runtime_pm_get(dev_priv);
- spin_lock_irq(&mchdev_lock);
-
+ spin_lock(&mchdev_lock);
__i915_update_gfx_val(dev_priv);
-
- spin_unlock_irq(&mchdev_lock);
- intel_runtime_pm_put(dev_priv);
+ spin_unlock(&mchdev_lock);
}
static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
@@ -2042,7 +2162,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
corr = corr * ((150142 * state1) / 10000 - 78642);
corr /= 100000;
- corr2 = (corr * ips->corr);
+ corr2 = corr * ips->corr;
state2 = (corr2 * state1) / 10000;
state2 /= 100; /* convert to mW */
@@ -2060,11 +2180,11 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
return 0;
intel_runtime_pm_get(dev_priv);
- spin_lock_irq(&mchdev_lock);
+ spin_lock(&mchdev_lock);
val = __i915_gfx_val(dev_priv);
- spin_unlock_irq(&mchdev_lock);
+ spin_unlock(&mchdev_lock);
intel_runtime_pm_put(dev_priv);
return val;
@@ -2102,8 +2222,10 @@ unsigned long i915_read_mch_val(void)
intel_runtime_pm_get(i915);
spin_lock_irq(&mchdev_lock);
+
chipset_val = __i915_chipset_val(i915);
graphics_val = __i915_gfx_val(i915);
+
spin_unlock_irq(&mchdev_lock);
intel_runtime_pm_put(i915);
@@ -2112,30 +2234,36 @@ unsigned long i915_read_mch_val(void)
}
EXPORT_SYMBOL_GPL(i915_read_mch_val);
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
+static bool ips_adjust(int dir)
{
struct drm_i915_private *i915;
- struct intel_ips *ips;
+ struct intel_rps *rps;
+ u8 old, new;
i915 = mchdev_get();
if (!i915)
return false;
- ips = &i915->gt_pm.ips;
+ rps = &i915->gt_pm.rps;
- spin_lock_irq(&mchdev_lock);
- if (ips->max_delay > ips->fmax)
- ips->max_delay--;
- spin_unlock_irq(&mchdev_lock);
+ old = READ_ONCE(rps->max_freq_soft);
+ new = clamp_t(int, old + dir, rps->min_freq_hw, rps->max_freq_hw);
+ if (cmpxchg(&rps->max_freq_soft, old, new) == old)
+ schedule_work(&rps->work);
drm_dev_put(&i915->drm);
return true;
}
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+ return ips_adjust(+1);
+}
EXPORT_SYMBOL_GPL(i915_gpu_raise);
/**
@@ -2146,22 +2274,7 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
*/
bool i915_gpu_lower(void)
{
- struct drm_i915_private *i915;
- struct intel_ips *ips;
-
- i915 = mchdev_get();
- if (!i915)
- return false;
-
- ips = &i915->gt_pm.ips;
-
- spin_lock_irq(&mchdev_lock);
- if (ips->max_delay < ips->min_delay)
- ips->max_delay++;
- spin_unlock_irq(&mchdev_lock);
-
- drm_dev_put(&i915->drm);
- return true;
+ return ips_adjust(-1);
}
EXPORT_SYMBOL_GPL(i915_gpu_lower);
@@ -2172,16 +2285,13 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
*/
bool i915_gpu_busy(void)
{
- struct drm_i915_private *i915;
- bool ret;
-
- i915 = mchdev_get();
- if (!i915)
- return false;
+ bool ret = false;
- ret = i915->gt.awake;
+ rcu_read_lock();
+ if (i915_mch_dev)
+ ret = READ_ONCE(i915_mch_dev)->gt.awake;
+ rcu_read_unlock();
- drm_dev_put(&i915->drm);
return ret;
}
EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -2195,22 +2305,33 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
bool i915_gpu_turbo_disable(void)
{
struct drm_i915_private *i915;
- bool ret;
i915 = mchdev_get();
if (!i915)
return false;
- spin_lock_irq(&mchdev_lock);
- i915->gt_pm.ips.max_delay = i915->gt_pm.ips.fstart;
- ret = ironlake_set_drps(i915, i915->gt_pm.ips.fstart);
- spin_unlock_irq(&mchdev_lock);
+ intel_gt_pm_disable_rps(i915);
drm_dev_put(&i915->drm);
- return ret;
+ return true;
}
EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+bool i915_gpu_turbo_enable(void)
+{
+ struct drm_i915_private *i915;
+
+ i915 = mchdev_get();
+ if (!i915)
+ return false;
+
+ intel_gt_pm_enable_rps(i915);
+
+ drm_dev_put(&i915->drm);
+ return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_enable);
+
/**
* Tells the intel_ips driver that the i915 driver is now loaded, if
* IPS got loaded first.
@@ -2247,85 +2368,15 @@ void intel_gpu_ips_teardown(void)
smp_store_mb(i915_mch_dev, NULL);
}
-static void intel_init_emon(struct drm_i915_private *dev_priv)
-{
- u32 lcfuse;
- u8 pxw[16];
- int i;
-
- /* Disable to program */
- I915_WRITE(ECR, 0);
- POSTING_READ(ECR);
-
- /* Program energy weights for various events */
- I915_WRITE(SDEW, 0x15040d00);
- I915_WRITE(CSIEW0, 0x007f0000);
- I915_WRITE(CSIEW1, 0x1e220004);
- I915_WRITE(CSIEW2, 0x04000004);
-
- for (i = 0; i < 5; i++)
- I915_WRITE(PEW(i), 0);
- for (i = 0; i < 3; i++)
- I915_WRITE(DEW(i), 0);
-
- /* Program P-state weights to account for frequency power adjustment */
- for (i = 0; i < 16; i++) {
- u32 pxvidfreq = I915_READ(PXVFREQ(i));
- unsigned long freq = intel_pxfreq(pxvidfreq);
- unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
- PXVFREQ_PX_SHIFT;
- unsigned long val;
-
- val = vid * vid;
- val *= freq / 1000;
- val *= 255;
- val /= 127*127*900;
- if (val > 0xff)
- DRM_ERROR("bad pxval: %ld\n", val);
- pxw[i] = val;
- }
- /* Render standby states get 0 weight */
- pxw[14] = 0;
- pxw[15] = 0;
-
- for (i = 0; i < 4; i++) {
- u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
- (pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
- I915_WRITE(PXW(i), val);
- }
-
- /* Adjust magic regs to magic values (more experimental results) */
- I915_WRITE(OGW0, 0);
- I915_WRITE(OGW1, 0);
- I915_WRITE(EG0, 0x00007f00);
- I915_WRITE(EG1, 0x0000000e);
- I915_WRITE(EG2, 0x000e0000);
- I915_WRITE(EG3, 0x68000300);
- I915_WRITE(EG4, 0x42000000);
- I915_WRITE(EG5, 0x00140031);
- I915_WRITE(EG6, 0);
- I915_WRITE(EG7, 0);
-
- for (i = 0; i < 8; i++)
- I915_WRITE(PXWL(i), 0);
-
- /* Enable PMON + select events */
- I915_WRITE(ECR, 0x80000019);
-
- lcfuse = I915_READ(LCFUSE02);
-
- dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
{
intel_gt_pm_disable_rps(dev_priv);
intel_gt_pm_disable_rc6(dev_priv);
- if (INTEL_GEN(dev_priv) < 11)
- gen6_reset_rps_interrupts(dev_priv);
- else
+ if (INTEL_GEN(dev_priv) >= 11)
WARN_ON_ONCE(1);
+ else if (INTEL_GEN(dev_priv) >= 6)
+ gen6_reset_rps_interrupts(dev_priv);
}
void intel_gt_pm_init(struct drm_i915_private *dev_priv)
@@ -2377,6 +2428,8 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
valleyview_init_gt_powersave(dev_priv);
else if (INTEL_GEN(dev_priv) >= 6)
gen6_init_rps_frequencies(dev_priv);
+ else if (INTEL_GEN(dev_priv) >= 5)
+ gen5_init_gt_powersave(dev_priv);
/* Derive initial user preferences/limits from the hardware limits */
rps->idle_freq = rps->min_freq_hw;
@@ -2404,6 +2457,9 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
}
}
+ rps->max_freq_soft = rps->max_freq_hw;
+ rps->min_freq_soft = rps->min_freq_hw;
+
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq_hw;
@@ -2453,7 +2509,6 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
gen6_enable_rps(dev_priv);
} else if (INTEL_GEN(dev_priv) >= 5) {
ironlake_enable_drps(dev_priv);
- intel_init_emon(dev_priv);
}
WARN_ON(rps->max_freq_hw < rps->min_freq_hw);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1ad86ee668d8..027c87489397 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -186,8 +186,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
break;
}
- dev_priv->gt_pm.ips.r_t = dev_priv->mem_freq;
-
switch (csipll & 0x3ff) {
case 0x00c:
dev_priv->fsb_freq = 3200;
@@ -216,14 +214,6 @@ static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv)
dev_priv->fsb_freq = 0;
break;
}
-
- if (dev_priv->fsb_freq == 3200) {
- dev_priv->gt_pm.ips.c_m = 0;
- } else if (dev_priv->fsb_freq > 3200 && dev_priv->fsb_freq <= 4800) {
- dev_priv->gt_pm.ips.c_m = 1;
- } else {
- dev_priv->gt_pm.ips.c_m = 2;
- }
}
static const struct cxsr_latency cxsr_latency_table[] = {
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index a0c95853fd3f..da7443baff55 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -336,6 +336,7 @@ struct ips_driver {
bool (*gpu_lower)(void);
bool (*gpu_busy)(void);
bool (*gpu_turbo_disable)(void);
+ bool (*gpu_turbo_enable)(void);
/* For restoration at unload */
u64 orig_turbo_limit;
@@ -575,7 +576,11 @@ static void ips_enable_gpu_turbo(struct ips_driver *ips)
{
if (ips->__gpu_turbo_on)
return;
- ips->__gpu_turbo_on = true;
+
+ if (!ips->gpu_turbo_enable())
+ dev_err(ips->dev, "failed to enable graphics turbo\n");
+ else
+ ips->__gpu_turbo_on = true;
}
/**
@@ -1432,9 +1437,14 @@ static bool ips_get_i915_syms(struct ips_driver *ips)
ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
if (!ips->gpu_turbo_disable)
goto out_put_busy;
+ ips->gpu_turbo_enable = symbol_get(i915_gpu_turbo_enable);
+ if (!ips->gpu_turbo_enable)
+ goto out_put_disable;
return true;
+out_put_disable:
+ symbol_put(i915_gpu_turbo_disable);
out_put_busy:
symbol_put(i915_gpu_busy);
out_put_lower:
@@ -1676,6 +1686,8 @@ static void ips_remove(struct pci_dev *dev)
symbol_put(i915_gpu_busy);
if (ips->gpu_turbo_disable)
symbol_put(i915_gpu_turbo_disable);
+ if (ips->gpu_turbo_enable)
+ symbol_put(i915_gpu_turbo_enable);
rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c9e5a6621b95..6ee5d77cc923 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -35,6 +35,7 @@ extern bool i915_gpu_raise(void);
extern bool i915_gpu_lower(void);
extern bool i915_gpu_busy(void);
extern bool i915_gpu_turbo_disable(void);
+extern bool i915_gpu_turbo_enable(void);
/* Exported from arch/x86/kernel/early-quirks.c */
extern struct resource intel_graphics_stolen_res;
--
2.16.2
More information about the Intel-gfx
mailing list