[PATCH 68/72] drm/i915,intel_ips: Enable GPU wait-boosting with IPS

Chris Wilson chris at chris-wilson.co.uk
Tue Feb 6 20:58:08 UTC 2018


Refactor the reclocking logic used by RPS on Ironlake to reuse the
infrastructure developed for RPS on Sandybridge+, along with the
waitboosting support for stalled clients and missed frames.

Reported-by: dimon at gmx.net
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90137
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |   5 +-
 drivers/gpu/drm/i915/i915_drv.h         |  20 +-
 drivers/gpu/drm/i915/i915_gem_request.c |   1 -
 drivers/gpu/drm/i915/i915_irq.c         |  77 ++---
 drivers/gpu/drm/i915/i915_sysfs.c       |  10 +
 drivers/gpu/drm/i915/intel_gt_pm.c      | 530 +++++++++++++++++---------------
 drivers/platform/x86/intel_ips.c        |  14 +-
 include/drm/i915_drm.h                  |   1 +
 8 files changed, 337 insertions(+), 321 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index e0a931d36c9e..2dd9e472aacd 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1370,6 +1370,7 @@ static int ironlake_drpc_info(struct seq_file *m)
 		   yesno(rgvmodectl & MEMMODE_HWIDLE_EN));
 	seq_printf(m, "SW control enabled: %s\n",
 		   yesno(rgvmodectl & MEMMODE_SWMODE_EN));
+	seq_printf(m, "RPS active? %s\n", yesno(dev_priv->gt.awake));
 	seq_printf(m, "Gated voltage change: %s\n",
 		   yesno(rgvmodectl & MEMMODE_RCLK_GATE));
 	seq_printf(m, "Starting frequency: P%d\n",
@@ -2160,10 +2161,12 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   intel_gpu_freq(dev_priv, rps->freq),
 		   intel_gpu_freq(dev_priv, rps->min),
 		   intel_gpu_freq(dev_priv, rps->max));
-	seq_printf(m, "  min hard:%d, user:%d; max user:%d, hard:%d\n",
+	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
 		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
+		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->min_freq_user),
 		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
 		   intel_gpu_freq(dev_priv, rps->idle_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 659dc89bccf1..d10103a6cc6d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -925,6 +925,8 @@ struct intel_rps {
 	u8 max_freq_hw;		/* Maximum frequency, RP0 if not overclocking */
 	u8 min_freq_user;	/* Minimum frequency permitted by the driver */
 	u8 max_freq_user;	/* Max frequency permitted by the driver */
+	u8 min_freq_soft;
+	u8 max_freq_soft;
 
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
@@ -948,23 +950,18 @@ struct intel_rps {
 	struct intel_rps_ei ei;
 };
 
-
 /* defined intel_pm.c */
 extern spinlock_t mchdev_lock;
 
 struct intel_ips {
-	u8 cur_delay;
-	u8 min_delay;
-	u8 max_delay;
-	u8 fmax;
-	u8 fstart;
-
-	u64 last_count1;
-	unsigned long last_time1;
 	unsigned long chipset_power;
-	u64 last_count2;
-	u64 last_time2;
 	unsigned long gfx_power;
+
+	ktime_t last_time1;
+	ktime_t last_time2;
+
+	u64 last_count1;
+	u32 last_count2;
 	u8 corr;
 
 	int c_m;
@@ -2944,7 +2941,6 @@ extern void intel_hangcheck_init(struct drm_i915_private *dev_priv);
 extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
 extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
-extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 
 int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index bda611744031..182544771a1f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -275,7 +275,6 @@ static void mark_busy(struct drm_i915_private *i915)
 	if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */
 		i915->gt.epoch = 1;
 
-	i915_update_gfx_val(i915);
 	intel_rps_busy(i915);
 	i915_pmu_gt_unparked(i915);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index f507405e268e..b0e676c36c48 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -394,9 +394,13 @@ static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_m
 {
 	lockdep_assert_held(&dev_priv->irq_lock);
 
-	dev_priv->pm_ier &= ~disable_mask;
-	__gen6_mask_pm_irq(dev_priv, disable_mask);
-	I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
+	if (INTEL_INFO(dev_priv)->gen >= 6) {
+		dev_priv->pm_ier &= ~disable_mask;
+		__gen6_mask_pm_irq(dev_priv, disable_mask);
+		I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier);
+	} else if (IS_IRONLAKE_M(dev_priv)) {
+		ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
+	}
 	/* though a barrier is missing here, but don't really need a one */
 }
 
@@ -434,9 +438,12 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv)
 	spin_lock_irq(&dev_priv->irq_lock);
 	rps->interrupts_enabled = false;
 
-	I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
-
-	gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	if (INTEL_GEN(dev_priv) >= 6) {
+		I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u));
+		gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+	} else {
+		ilk_disable_display_irq(dev_priv, DE_PCU_EVENT);
+	}
 
 	spin_unlock_irq(&dev_priv->irq_lock);
 	synchronize_irq(dev_priv->drm.irq);
@@ -1031,45 +1038,6 @@ int intel_get_crtc_scanline(struct intel_crtc *crtc)
 	return position;
 }
 
-static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
-{
-	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u32 busy_up, busy_down, max_avg, min_avg;
-	u8 new_delay;
-
-	spin_lock(&mchdev_lock);
-
-	I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
-
-	new_delay = ips->cur_delay;
-
-	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
-	busy_up = I915_READ(RCPREVBSYTUPAVG);
-	busy_down = I915_READ(RCPREVBSYTDNAVG);
-	max_avg = I915_READ(RCBMAXAVG);
-	min_avg = I915_READ(RCBMINAVG);
-
-	/* Handle RCS change request from hw */
-	if (busy_up > max_avg) {
-		if (ips->cur_delay != ips->max_delay)
-			new_delay = ips->cur_delay - 1;
-		if (new_delay < ips->max_delay)
-			new_delay = ips->max_delay;
-	} else if (busy_down < min_avg) {
-		if (ips->cur_delay != ips->min_delay)
-			new_delay = ips->cur_delay + 1;
-		if (new_delay > ips->min_delay)
-			new_delay = ips->min_delay;
-	}
-
-	if (ironlake_set_drps(dev_priv, new_delay))
-		ips->cur_delay = new_delay;
-
-	spin_unlock(&mchdev_lock);
-
-	return;
-}
-
 static void notify_ring(struct intel_engine_cs *engine)
 {
 	const u32 seqno = intel_engine_get_seqno(engine);
@@ -2273,8 +2241,12 @@ static void ilk_display_irq_handler(struct drm_i915_private *dev_priv,
 		I915_WRITE(SDEIIR, pch_iir);
 	}
 
-	if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT)
-		ironlake_rps_change_irq_handler(dev_priv);
+	if (IS_GEN5(dev_priv) && de_iir & DE_PCU_EVENT) {
+		struct intel_rps *rps = &dev_priv->gt_pm.rps;
+
+		rps->pm_iir = GEN6_PM_RP_DOWN_EI_EXPIRED;
+		schedule_work(&rps->work);
+	}
 }
 
 static void ivb_display_irq_handler(struct drm_i915_private *dev_priv,
@@ -3367,17 +3339,6 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 
 	ibx_irq_postinstall(dev);
 
-	if (IS_IRONLAKE_M(dev_priv)) {
-		/* Enable PCU event interrupts
-		 *
-		 * spinlocking not required here for correctness since interrupt
-		 * setup is guaranteed to run in single-threaded context. But we
-		 * need it to make the assert_spin_locked happy. */
-		spin_lock_irq(&dev_priv->irq_lock);
-		ilk_enable_display_irq(dev_priv, DE_PCU_EVENT);
-		spin_unlock_irq(&dev_priv->irq_lock);
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 7ac6d7b4a722..4c341d758aa0 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -455,6 +455,14 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
 	return snprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static const struct attribute *gen5_attrs[] = {
+	&dev_attr_gt_cur_freq_mhz.attr,
+	&dev_attr_gt_max_freq_mhz.attr,
+	&dev_attr_gt_min_freq_mhz.attr,
+	&dev_attr_gt_RP0_freq_mhz.attr,
+	&dev_attr_gt_RPn_freq_mhz.attr,
+	NULL,
+};
 static const struct attribute *gen6_attrs[] = {
 	&dev_attr_gt_act_freq_mhz.attr,
 	&dev_attr_gt_cur_freq_mhz.attr,
@@ -591,6 +599,8 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
 		ret = sysfs_create_files(&kdev->kobj, vlv_attrs);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		ret = sysfs_create_files(&kdev->kobj, gen6_attrs);
+	else if (INTEL_INFO(dev_priv)->gen >= 5)
+		ret = sysfs_create_files(&kdev->kobj, gen5_attrs);
 	if (ret)
 		DRM_ERROR("RPS sysfs setup failed\n");
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 67952155f0d6..843057f875d9 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -50,44 +50,62 @@
  * which brings the most power savings; deeper states save more power, but
  * require higher latency to switch to and wake up.
  */
+static void gen5_update_gfx_val(struct drm_i915_private *dev_priv);
 
 /*
  * Lock protecting IPS related data structures
  */
 DEFINE_SPINLOCK(mchdev_lock);
 
-bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val)
+static int __ironlake_wait_for_rps(struct drm_i915_private *dev_priv)
 {
+	return wait_for_atomic((I915_READ16(MEMSWCTL) & MEMCTL_CMD_STS) == 0,
+			       10) == 0;
+}
+
+static int __ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	u16 rgvswctl;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	rgvswctl = I915_READ16(MEMSWCTL);
-	if (rgvswctl & MEMCTL_CMD_STS) {
-		DRM_DEBUG("gpu busy, RCS change rejected\n");
-		return false; /* still busy with another command */
+	if (!__ironlake_wait_for_rps(dev_priv)) {
+		DRM_DEBUG_DRIVER("gpu busy, RCS change rejected\n");
+		return -EAGAIN; /* still busy with another command */
 	}
 
-	rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
-		(val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+	val = rps->max_freq_hw - val + rps->min_freq_hw;
+
+	rgvswctl =
+		(MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+		(val << MEMCTL_FREQ_SHIFT) |
+		MEMCTL_SFCAVM;
 	I915_WRITE16(MEMSWCTL, rgvswctl);
 	POSTING_READ16(MEMSWCTL);
 
 	rgvswctl |= MEMCTL_CMD_STS;
 	I915_WRITE16(MEMSWCTL, rgvswctl);
 
-	return true;
+	return 0;
+}
+
+static int ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	if (val != dev_priv->gt_pm.rps.freq) {
+		spin_lock_irq(&mchdev_lock);
+		__ironlake_set_rps(dev_priv, val);
+		spin_unlock_irq(&mchdev_lock);
+	}
+
+	return 0;
 }
 
 static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 {
 	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u32 rgvmodectl;
-	u8 fmax, fmin, fstart, vstart;
 
-	spin_lock_irq(&mchdev_lock);
-
-	rgvmodectl = I915_READ(MEMMODECTL);
+	spin_lock(&mchdev_lock);
 
 	/* Enable temp reporting */
 	I915_WRITE16(PMMISC, I915_READ(PMMISC) | MCPPCE_EN);
@@ -103,75 +121,67 @@ static void ironlake_enable_drps(struct drm_i915_private *dev_priv)
 
 	I915_WRITE(MEMIHYST, 1);
 
-	/* Set up min, max, and cur for interrupt handling */
-	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
-	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
-	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
-		MEMMODE_FSTART_SHIFT;
-
-	vstart = (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >>
-		PXVFREQ_PX_SHIFT;
-
-	ips->fmax = fmax; /* IPS callback will increase this */
-	ips->fstart = fstart;
-
-	ips->max_delay = fstart;
-	ips->min_delay = fmin;
-	ips->cur_delay = fstart;
-
-	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
-			 fmax, fmin, fstart);
-
 	I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
 
-	/*
-	 * Interrupts will be enabled in ironlake_irq_postinstall
-	 */
-
-	I915_WRITE(VIDSTART, vstart);
-	POSTING_READ(VIDSTART);
-
-	rgvmodectl |= MEMMODE_SWMODE_EN;
-	I915_WRITE(MEMMODECTL, rgvmodectl);
-
-	if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10))
+	I915_WRITE(MEMMODECTL, I915_READ(MEMMODECTL) | MEMMODE_SWMODE_EN);
+	if (!__ironlake_wait_for_rps(dev_priv))
 		DRM_ERROR("stuck trying to change perf mode\n");
 	mdelay(1);
 
-	ironlake_set_drps(dev_priv, fstart);
+	ips->last_count1 = I915_READ(DMIEC);
+	ips->last_count1 += I915_READ(DDREC);
+	ips->last_count1 += I915_READ(CSIEC);
+	ips->last_time1 = ktime_get_raw();
 
-	ips->last_count1 =
-		I915_READ(DMIEC) + I915_READ(DDREC) + I915_READ(CSIEC);
-	ips->last_time1 = jiffies_to_msecs(jiffies);
 	ips->last_count2 = I915_READ(GFXEC);
 	ips->last_time2 = ktime_get_raw_ns();
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
+}
+
+static void ironlake_init_drps(struct drm_i915_private *dev_priv)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	u32 rgvmodectl;
+	u8 fmax, fmin, fstart;
+
+	spin_lock(&mchdev_lock);
+	rgvmodectl = I915_READ(MEMMODECTL);
+	spin_unlock(&mchdev_lock);
+
+	/* Set up min, max, and cur for interrupt handling */
+	fmax = (rgvmodectl & MEMMODE_FMAX_MASK) >> MEMMODE_FMAX_SHIFT;
+	fmin = (rgvmodectl & MEMMODE_FMIN_MASK);
+	fstart = (rgvmodectl & MEMMODE_FSTART_MASK) >>
+		MEMMODE_FSTART_SHIFT;
+	DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
+			 fmax, fmin, fstart);
+
+	rps->max_freq_hw = fmin;
+	rps->min_freq_hw = fmax;
+	rps->efficient_freq = fmin - fstart;
+
+	I915_WRITE(VIDSTART,
+		   (I915_READ(PXVFREQ(fstart)) & PXVFREQ_PX_MASK) >> PXVFREQ_PX_SHIFT);
 }
 
 static void ironlake_disable_drps(struct drm_i915_private *dev_priv)
 {
 	u16 rgvswctl;
 
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	rgvswctl = I915_READ16(MEMSWCTL);
 
 	/* Ack interrupts, disable EFC interrupt */
 	I915_WRITE(MEMINTREN, I915_READ(MEMINTREN) & ~MEMINT_EVAL_CHG_EN);
-	I915_WRITE(MEMINTRSTS, MEMINT_EVAL_CHG);
-	I915_WRITE(DEIER, I915_READ(DEIER) & ~DE_PCU_EVENT);
-	I915_WRITE(DEIIR, DE_PCU_EVENT);
-	I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
+	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+	I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS));
 
-	/* Go back to the starting frequency */
-	ironlake_set_drps(dev_priv, dev_priv->gt_pm.ips.fstart);
-	mdelay(1);
 	rgvswctl |= MEMCTL_CMD_STS;
-	I915_WRITE(MEMSWCTL, rgvswctl);
-	mdelay(1);
+	I915_WRITE16(MEMSWCTL, rgvswctl);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 }
 
 /* There's a funny hw issue where the hw returns all 0 when reading from
@@ -383,6 +393,8 @@ static int __intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		return valleyview_set_rps(dev_priv, val);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		return gen6_set_rps(dev_priv, val);
+	else if (INTEL_GEN(dev_priv) >= 5)
+		return ironlake_set_rps(dev_priv, val);
 	else
 		return 0;
 }
@@ -459,6 +471,37 @@ static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
 	return events;
 }
 
+static u32 ilk_compute_pm_iir(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	if ((pm_iir & GEN6_PM_RP_DOWN_EI_EXPIRED) == 0)
+		return 0;
+
+	spin_lock(&mchdev_lock);
+	I915_WRITE16(MEMINTRSTS, I915_READ16(MEMINTRSTS));
+	I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+
+#define busy_up I915_READ(RCPREVBSYTUPAVG)
+#define busy_down I915_READ(RCPREVBSYTDNAVG)
+#define max_avg I915_READ(RCBMAXAVG)
+#define min_avg I915_READ(RCBMINAVG)
+
+	if (busy_up > max_avg)
+		pm_iir = GEN6_PM_RP_UP_THRESHOLD;
+	else if (busy_down < min_avg)
+		pm_iir = GEN6_PM_RP_DOWN_THRESHOLD;
+	else
+		pm_iir = 0;
+
+#undef busy_up
+#undef busy_down
+#undef max_avg
+#undef min_avg
+
+	spin_unlock(&mchdev_lock);
+
+	return pm_iir;
+}
+
 static void intel_rps_work(struct work_struct *work)
 {
 	struct drm_i915_private *i915 =
@@ -468,15 +511,20 @@ static void intel_rps_work(struct work_struct *work)
 	bool client_boost;
 	u32 pm_iir;
 
-	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+	pm_iir = xchg(&rps->pm_iir, 0);
 	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+	pm_iir |= ilk_compute_pm_iir(i915, pm_iir);
 
 	client_boost = atomic_read(&rps->num_waiters);
 
 	mutex_lock(&rps->lock);
 
-	min = rps->min_freq_user;
-	max = rps->max_freq_user;
+	min = clamp_t(int,
+		      rps->min_freq_soft,
+		      rps->min_freq_user, rps->max_freq_user);
+	max = clamp_t(int,
+		      rps->max_freq_soft,
+		      min, rps->max_freq_user);
 	if (client_boost && max < rps->boost_freq)
 		max = rps->boost_freq;
 
@@ -562,6 +610,8 @@ void intel_rps_busy(struct drm_i915_private *dev_priv)
 	if (INTEL_GEN(dev_priv) >= 6) {
 		memset(&rps->ei, 0, sizeof(rps->ei));
 		gen6_enable_rps_interrupts(dev_priv);
+	} else if (INTEL_GEN(dev_priv) >= 5) {
+		gen5_update_gfx_val(dev_priv);
 	}
 }
 
@@ -1398,6 +1448,94 @@ static void vlv_init_gpll_ref_freq(struct drm_i915_private *dev_priv)
 			 dev_priv->gt_pm.rps.gpll_ref_freq);
 }
 
+static unsigned long ilk_pxfreq(u32 vidfreq)
+{
+	int div = (vidfreq & 0x3f0000) >> 16;
+	int post = (vidfreq & 0x3000) >> 12;
+	int pre = (vidfreq & 0x7);
+
+	if (!pre)
+		return 0;
+
+	return (div * 133333) / (pre << post);
+}
+
+static void ilk_init_emon(struct drm_i915_private *dev_priv)
+{
+	u32 lcfuse;
+	u8 pxw[16];
+	int i;
+
+	/* Disable to program */
+	I915_WRITE(ECR, 0);
+	POSTING_READ(ECR);
+
+	/* Program energy weights for various events */
+	I915_WRITE(SDEW, 0x15040d00);
+	I915_WRITE(CSIEW0, 0x007f0000);
+	I915_WRITE(CSIEW1, 0x1e220004);
+	I915_WRITE(CSIEW2, 0x04000004);
+
+	for (i = 0; i < 5; i++)
+		I915_WRITE(PEW(i), 0);
+	for (i = 0; i < 3; i++)
+		I915_WRITE(DEW(i), 0);
+
+	/* Program P-state weights to account for frequency power adjustment */
+	for (i = 0; i < 16; i++) {
+		u32 pxvidfreq = I915_READ(PXVFREQ(i));
+		unsigned long freq = ilk_pxfreq(pxvidfreq);
+		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
+			PXVFREQ_PX_SHIFT;
+		unsigned long val;
+
+		val = vid * vid;
+		val *= (freq / 1000);
+		val *= 255;
+		val /= (127*127*900);
+		if (val > 0xff)
+			DRM_ERROR("bad pxval: %ld\n", val);
+		pxw[i] = val;
+	}
+	/* Render standby states get 0 weight */
+	pxw[14] = 0;
+	pxw[15] = 0;
+
+	for (i = 0; i < 4; i++) {
+		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
+			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
+		I915_WRITE(PXW(i), val);
+	}
+
+	/* Adjust magic regs to magic values (more experimental results) */
+	I915_WRITE(OGW0, 0);
+	I915_WRITE(OGW1, 0);
+	I915_WRITE(EG0, 0x00007f00);
+	I915_WRITE(EG1, 0x0000000e);
+	I915_WRITE(EG2, 0x000e0000);
+	I915_WRITE(EG3, 0x68000300);
+	I915_WRITE(EG4, 0x42000000);
+	I915_WRITE(EG5, 0x00140031);
+	I915_WRITE(EG6, 0);
+	I915_WRITE(EG7, 0);
+
+	for (i = 0; i < 8; i++)
+		I915_WRITE(PXWL(i), 0);
+
+	/* Enable PMON + select events */
+	I915_WRITE(ECR, 0x80000019);
+
+	lcfuse = I915_READ(LCFUSE02);
+
+	dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
+}
+
+static void gen5_init_gt_powersave(struct drm_i915_private *i915)
+{
+	ilk_init_emon(i915);
+	ironlake_init_drps(i915);
+}
+
 static void valleyview_init_gt_powersave(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -1695,21 +1833,6 @@ static void valleyview_enable_rps(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 }
 
-static unsigned long intel_pxfreq(u32 vidfreq)
-{
-	unsigned long freq;
-	int div = (vidfreq & 0x3f0000) >> 16;
-	int post = (vidfreq & 0x3000) >> 12;
-	int pre = (vidfreq & 0x7);
-
-	if (!pre)
-		return 0;
-
-	freq = ((div * 133333) / ((1<<post) * pre));
-
-	return freq;
-}
-
 static const struct cparams {
 	u16 i;
 	u16 t;
@@ -1727,37 +1850,30 @@ static const struct cparams {
 static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 {
 	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u64 total_count, diff, ret;
-	u32 count1, count2, count3, m = 0, c = 0;
-	unsigned long now = jiffies_to_msecs(jiffies), diff1;
+	u64 total_count;
+	ktime_t dt, now;
+	u32 m = 0, c = 0;
 	int i;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	diff1 = now - ips->last_time1;
+	/* FIXME: handle per-counter overflow */
+
+	total_count = I915_READ(DMIEC);
+	total_count += I915_READ(DDREC);
+	total_count += I915_READ(CSIEC);
+	now = ktime_get_raw();
 
-	/* Prevent division-by-zero if we are asking too fast.
+	/*
+	 * Prevent division-by-zero if we are asking too fast.
 	 * Also, we don't get interesting results if we are polling
 	 * faster than once in 10ms, so just return the saved value
 	 * in such cases.
 	 */
-	if (diff1 <= 10)
+	dt = ktime_sub(now, ips->last_time1);
+	if (ktime_to_ms(dt) <= 10)
 		return ips->chipset_power;
 
-	count1 = I915_READ(DMIEC);
-	count2 = I915_READ(DDREC);
-	count3 = I915_READ(CSIEC);
-
-	total_count = count1 + count2 + count3;
-
-	/* FIXME: handle per-counter overflow */
-	if (total_count < ips->last_count1) {
-		diff = ~0UL - ips->last_count1;
-		diff += total_count;
-	} else {
-		diff = total_count - ips->last_count1;
-	}
-
 	for (i = 0; i < ARRAY_SIZE(cparams); i++) {
 		if (cparams[i].i == ips->c_m && cparams[i].t == ips->r_t) {
 			m = cparams[i].m;
@@ -1766,31 +1882,28 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv)
 		}
 	}
 
-	diff = div_u64(diff, diff1);
-	ret = ((m * diff) + c);
-	ret = div_u64(ret, 10);
+	ips->chipset_power = div_u64(m * (total_count - ips->last_count1) + c,
+				     ktime_to_ms(dt) * 10);
 
 	ips->last_count1 = total_count;
 	ips->last_time1 = now;
 
-	ips->chipset_power = ret;
-
-	return ret;
+	return ips->chipset_power;
 }
 
 unsigned long i915_chipset_val(struct drm_i915_private *dev_priv)
 {
 	unsigned long val;
 
-	if (INTEL_INFO(dev_priv)->gen != 5)
+	if (INTEL_GEN(dev_priv) != 5)
 		return 0;
 
 	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	val = __i915_chipset_val(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return val;
@@ -1808,7 +1921,7 @@ unsigned long i915_mch_val(struct drm_i915_private *dev_priv)
 
 	b = tsfs & TSFS_INTR_MASK;
 
-	return ((m * x) / 127) - b;
+	return m * x / 127 - b;
 }
 
 static int _pxvid_to_vd(u8 pxvid)
@@ -1836,48 +1949,34 @@ static u32 pvid_to_extvid(struct drm_i915_private *dev_priv, u8 pxvid)
 static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
 {
 	struct intel_ips *ips = &dev_priv->gt_pm.ips;
-	u64 now, diff, diffms;
+	ktime_t now, dt;
 	u32 count;
 
 	lockdep_assert_held(&mchdev_lock);
 
-	now = ktime_get_raw_ns();
-	diffms = now - ips->last_time2;
-	do_div(diffms, NSEC_PER_MSEC);
+	count = I915_READ(GFXEC);
 
-	/* Don't divide by 0 */
-	if (!diffms)
+	now = ktime_get_raw();
+	dt = ktime_sub(now, ips->last_time2);
+	if (ktime_to_ms(dt) <= 10)
 		return;
 
-	count = I915_READ(GFXEC);
-
-	if (count < ips->last_count2) {
-		diff = ~0UL - ips->last_count2;
-		diff += count;
-	} else {
-		diff = count - ips->last_count2;
-	}
+	/* More magic constants... */
+	ips->gfx_power = div_u64(1181ull * (count - ips->last_count2),
+				 ktime_to_ms(dt) * 10);
 
 	ips->last_count2 = count;
 	ips->last_time2 = now;
-
-	/* More magic constants... */
-	diff = diff * 1181;
-	diff = div_u64(diff, diffms * 10);
-	ips->gfx_power = diff;
 }
 
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+static void gen5_update_gfx_val(struct drm_i915_private *dev_priv)
 {
-	if (INTEL_INFO(dev_priv)->gen != 5)
-		return;
-
 	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	__i915_update_gfx_val(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 	intel_runtime_pm_put(dev_priv);
 }
 
@@ -1909,7 +2008,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
 
 	corr = corr * ((150142 * state1) / 10000 - 78642);
 	corr /= 100000;
-	corr2 = (corr * ips->corr);
+	corr2 = corr * ips->corr;
 
 	state2 = (corr2 * state1) / 10000;
 	state2 /= 100; /* convert to mW */
@@ -1927,11 +2026,11 @@ unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
 		return 0;
 
 	intel_runtime_pm_get(dev_priv);
-	spin_lock_irq(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 
 	val = __i915_gfx_val(dev_priv);
 
-	spin_unlock_irq(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return val;
@@ -1969,8 +2068,10 @@ unsigned long i915_read_mch_val(void)
 
 	intel_runtime_pm_get(i915);
 	spin_lock_irq(&mchdev_lock);
+
 	chipset_val = __i915_chipset_val(i915);
 	graphics_val = __i915_gfx_val(i915);
+
 	spin_unlock_irq(&mchdev_lock);
 	intel_runtime_pm_put(i915);
 
@@ -1979,30 +2080,36 @@ unsigned long i915_read_mch_val(void)
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
-/**
- * i915_gpu_raise - raise GPU frequency limit
- *
- * Raise the limit; IPS indicates we have thermal headroom.
- */
-bool i915_gpu_raise(void)
+static bool ips_adjust(int dir)
 {
 	struct drm_i915_private *i915;
-	struct intel_ips *ips;
+	struct intel_rps *rps;
+	u8 old, new;
 
 	i915 = mchdev_get();
 	if (!i915)
 		return false;
 
-	ips = &i915->gt_pm.ips;
+	rps = &i915->gt_pm.rps;
 
-	spin_lock_irq(&mchdev_lock);
-	if (ips->max_delay > ips->fmax)
-		ips->max_delay--;
-	spin_unlock_irq(&mchdev_lock);
+	old = READ_ONCE(rps->max_freq_soft);
+	new = clamp_t(int, old + dir, rps->min_freq_hw, rps->max_freq_hw);
+	if (cmpxchg(&rps->max_freq_soft, old, new) == old)
+		schedule_work(&rps->work);
 
 	drm_dev_put(&i915->drm);
 	return true;
 }
+
+/**
+ * i915_gpu_raise - raise GPU frequency limit
+ *
+ * Raise the limit; IPS indicates we have thermal headroom.
+ */
+bool i915_gpu_raise(void)
+{
+	return ips_adjust(+1);
+}
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
 /**
@@ -2013,22 +2120,7 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-	struct drm_i915_private *i915;
-	struct intel_ips *ips;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
-
-	ips = &i915->gt_pm.ips;
-
-	spin_lock_irq(&mchdev_lock);
-	if (ips->max_delay < ips->min_delay)
-		ips->max_delay++;
-	spin_unlock_irq(&mchdev_lock);
-
-	drm_dev_put(&i915->drm);
-	return true;
+	return ips_adjust(-1);
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -2039,16 +2131,13 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-	struct drm_i915_private *i915;
-	bool ret;
-
-	i915 = mchdev_get();
-	if (!i915)
-		return false;
+	bool ret = false;
 
-	ret = i915->gt.awake;
+	rcu_read_lock();
+	if (i915_mch_dev)
+		ret = READ_ONCE(i915_mch_dev)->gt.awake;
+	rcu_read_unlock();
 
-	drm_dev_put(&i915->drm);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
@@ -2062,22 +2151,33 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
 bool i915_gpu_turbo_disable(void)
 {
 	struct drm_i915_private *i915;
-	bool ret;
 
 	i915 = mchdev_get();
 	if (!i915)
 		return false;
 
-	spin_lock_irq(&mchdev_lock);
-	i915->gt_pm.ips.max_delay = i915->gt_pm.ips.fstart;
-	ret = ironlake_set_drps(i915, i915->gt_pm.ips.fstart);
-	spin_unlock_irq(&mchdev_lock);
+	intel_gt_disable_rps(i915);
 
 	drm_dev_put(&i915->drm);
-	return ret;
+	return true;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
+bool i915_gpu_turbo_enable(void)
+{
+	struct drm_i915_private *i915;
+
+	i915 = mchdev_get();
+	if (!i915)
+		return false;
+
+	intel_gt_enable_rps(i915);
+
+	drm_dev_put(&i915->drm);
+	return true;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_enable);
+
 /**
  * Tells the intel_ips driver that the i915 driver is now loaded, if
  * IPS got loaded first.
@@ -2112,76 +2212,6 @@ void intel_gpu_ips_teardown(void)
 	smp_store_mb(i915_mch_dev, NULL);
 }
 
-static void intel_init_emon(struct drm_i915_private *dev_priv)
-{
-	u32 lcfuse;
-	u8 pxw[16];
-	int i;
-
-	/* Disable to program */
-	I915_WRITE(ECR, 0);
-	POSTING_READ(ECR);
-
-	/* Program energy weights for various events */
-	I915_WRITE(SDEW, 0x15040d00);
-	I915_WRITE(CSIEW0, 0x007f0000);
-	I915_WRITE(CSIEW1, 0x1e220004);
-	I915_WRITE(CSIEW2, 0x04000004);
-
-	for (i = 0; i < 5; i++)
-		I915_WRITE(PEW(i), 0);
-	for (i = 0; i < 3; i++)
-		I915_WRITE(DEW(i), 0);
-
-	/* Program P-state weights to account for frequency power adjustment */
-	for (i = 0; i < 16; i++) {
-		u32 pxvidfreq = I915_READ(PXVFREQ(i));
-		unsigned long freq = intel_pxfreq(pxvidfreq);
-		unsigned long vid = (pxvidfreq & PXVFREQ_PX_MASK) >>
-			PXVFREQ_PX_SHIFT;
-		unsigned long val;
-
-		val = vid * vid;
-		val *= (freq / 1000);
-		val *= 255;
-		val /= (127*127*900);
-		if (val > 0xff)
-			DRM_ERROR("bad pxval: %ld\n", val);
-		pxw[i] = val;
-	}
-	/* Render standby states get 0 weight */
-	pxw[14] = 0;
-	pxw[15] = 0;
-
-	for (i = 0; i < 4; i++) {
-		u32 val = (pxw[i*4] << 24) | (pxw[(i*4)+1] << 16) |
-			(pxw[(i*4)+2] << 8) | (pxw[(i*4)+3]);
-		I915_WRITE(PXW(i), val);
-	}
-
-	/* Adjust magic regs to magic values (more experimental results) */
-	I915_WRITE(OGW0, 0);
-	I915_WRITE(OGW1, 0);
-	I915_WRITE(EG0, 0x00007f00);
-	I915_WRITE(EG1, 0x0000000e);
-	I915_WRITE(EG2, 0x000e0000);
-	I915_WRITE(EG3, 0x68000300);
-	I915_WRITE(EG4, 0x42000000);
-	I915_WRITE(EG5, 0x00140031);
-	I915_WRITE(EG6, 0);
-	I915_WRITE(EG7, 0);
-
-	for (i = 0; i < 8; i++)
-		I915_WRITE(PXWL(i), 0);
-
-	/* Enable PMON + select events */
-	I915_WRITE(ECR, 0x80000019);
-
-	lcfuse = I915_READ(LCFUSE02);
-
-	dev_priv->gt_pm.ips.corr = (lcfuse & LCFUSE_HIV_MASK);
-}
-
 void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 {
 	intel_gt_disable_rps(dev_priv);
@@ -2215,6 +2245,8 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		valleyview_init_gt_powersave(dev_priv);
 	else if (INTEL_GEN(dev_priv) >= 6)
 		gen6_init_rps_frequencies(dev_priv);
+	else if (INTEL_GEN(dev_priv) >= 5)
+		gen5_init_gt_powersave(dev_priv);
 
 	/* Derive initial user preferences/limits from the hardware limits */
 	rps->idle_freq = rps->min_freq_hw;
@@ -2242,6 +2274,9 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 		}
 	}
 
+	rps->max_freq_soft = rps->max_freq_hw;
+	rps->min_freq_soft = rps->min_freq_hw;
+
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq_hw;
 
@@ -2289,7 +2324,6 @@ static void __enable_rps(struct drm_i915_private *dev_priv)
 		gen6_enable_rps(dev_priv);
 	} else if (INTEL_GEN(dev_priv) >= 5) {
 		ironlake_enable_drps(dev_priv);
-		intel_init_emon(dev_priv);
 	}
 
 	WARN_ON(rps->max_freq_hw < rps->min_freq_hw);
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index a0c95853fd3f..da7443baff55 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -336,6 +336,7 @@ struct ips_driver {
 	bool (*gpu_lower)(void);
 	bool (*gpu_busy)(void);
 	bool (*gpu_turbo_disable)(void);
+	bool (*gpu_turbo_enable)(void);
 
 	/* For restoration at unload */
 	u64 orig_turbo_limit;
@@ -575,7 +576,11 @@ static void ips_enable_gpu_turbo(struct ips_driver *ips)
 {
 	if (ips->__gpu_turbo_on)
 		return;
-	ips->__gpu_turbo_on = true;
+
+	if (!ips->gpu_turbo_enable())
+		dev_err(ips->dev, "failed to enable graphics turbo\n");
+	else
+		ips->__gpu_turbo_on = true;
 }
 
 /**
@@ -1432,9 +1437,14 @@ static bool ips_get_i915_syms(struct ips_driver *ips)
 	ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
 	if (!ips->gpu_turbo_disable)
 		goto out_put_busy;
+	ips->gpu_turbo_enable = symbol_get(i915_gpu_turbo_enable);
+	if (!ips->gpu_turbo_enable)
+		goto out_put_disable;
 
 	return true;
 
+out_put_disable:
+	symbol_put(i915_gpu_turbo_disable);
 out_put_busy:
 	symbol_put(i915_gpu_busy);
 out_put_lower:
@@ -1676,6 +1686,8 @@ static void ips_remove(struct pci_dev *dev)
 		symbol_put(i915_gpu_busy);
 	if (ips->gpu_turbo_disable)
 		symbol_put(i915_gpu_turbo_disable);
+	if (ips->gpu_turbo_enable)
+		symbol_put(i915_gpu_turbo_enable);
 
 	rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
 	turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index c9e5a6621b95..6ee5d77cc923 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -35,6 +35,7 @@ extern bool i915_gpu_raise(void);
 extern bool i915_gpu_lower(void);
 extern bool i915_gpu_busy(void);
 extern bool i915_gpu_turbo_disable(void);
+extern bool i915_gpu_turbo_enable(void);
 
 /* Exported from arch/x86/kernel/early-quirks.c */
 extern struct resource intel_graphics_stolen_res;
-- 
2.16.1



More information about the Intel-gfx-trybot mailing list