[PATCH 26/41] drm/i915: Move rps worker to intel_gt_pm.c

Chris Wilson chris at chris-wilson.co.uk
Fri Mar 9 11:34:59 UTC 2018


Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h    |   1 -
 drivers/gpu/drm/i915/i915_irq.c    | 141 ---------------------------
 drivers/gpu/drm/i915/i915_sysfs.c  |  38 ++------
 drivers/gpu/drm/i915/intel_gt_pm.c | 190 ++++++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/intel_gt_pm.h |   1 -
 5 files changed, 166 insertions(+), 205 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d7b34fe8ca39..b3f79a43cd97 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3735,7 +3735,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
 extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
 extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
 extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
-extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
 extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
 				  bool enable);
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4d13ecd954cd..5a817b7e3212 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1130,145 +1130,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	trace_intel_engine_notify(engine, wait);
 }
 
-static void vlv_c0_read(struct drm_i915_private *dev_priv,
-			struct intel_rps_ei *ei)
-{
-	ei->ktime = ktime_get_raw();
-	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
-	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
-}
-
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
-{
-	memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
-}
-
-static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	const struct intel_rps_ei *prev = &rps->ei;
-	struct intel_rps_ei now;
-	u32 events = 0;
-
-	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
-		return 0;
-
-	vlv_c0_read(dev_priv, &now);
-
-	if (prev->ktime) {
-		u64 time, c0;
-		u32 render, media;
-
-		time = ktime_us_delta(now.ktime, prev->ktime);
-
-		time *= dev_priv->czclk_freq;
-
-		/* Workload can be split between render + media,
-		 * e.g. SwapBuffers being blitted in X after being rendered in
-		 * mesa. To account for this we need to combine both engines
-		 * into our activity counter.
-		 */
-		render = now.render_c0 - prev->render_c0;
-		media = now.media_c0 - prev->media_c0;
-		c0 = max(render, media);
-		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
-
-		if (c0 > time * rps->up_threshold)
-			events = GEN6_PM_RP_UP_THRESHOLD;
-		else if (c0 < time * rps->down_threshold)
-			events = GEN6_PM_RP_DOWN_THRESHOLD;
-	}
-
-	rps->ei = now;
-	return events;
-}
-
-static void gen6_pm_rps_work(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, gt_pm.rps.work);
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	bool client_boost = false;
-	int new_delay, adj, min, max;
-	u32 pm_iir = 0;
-
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (rps->interrupts_enabled) {
-		pm_iir = fetch_and_zero(&rps->pm_iir);
-		client_boost = atomic_read(&rps->num_waiters);
-	}
-	spin_unlock_irq(&dev_priv->irq_lock);
-
-	/* Make sure we didn't queue anything we're not going to process. */
-	WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
-	if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
-		goto out;
-
-	mutex_lock(&rps->lock);
-
-	pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
-
-	adj = rps->last_adj;
-	new_delay = rps->cur_freq;
-	min = rps->min_freq_softlimit;
-	max = rps->max_freq_softlimit;
-	if (client_boost)
-		max = rps->max_freq;
-	if (client_boost && new_delay < rps->boost_freq) {
-		new_delay = rps->boost_freq;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
-		if (adj > 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
-
-		if (new_delay >= rps->max_freq_softlimit)
-			adj = 0;
-	} else if (client_boost) {
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
-		if (rps->cur_freq > rps->efficient_freq)
-			new_delay = rps->efficient_freq;
-		else if (rps->cur_freq > rps->min_freq_softlimit)
-			new_delay = rps->min_freq_softlimit;
-		adj = 0;
-	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
-		if (adj < 0)
-			adj *= 2;
-		else /* CHV needs even encode values */
-			adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
-
-		if (new_delay <= rps->min_freq_softlimit)
-			adj = 0;
-	} else { /* unknown event */
-		adj = 0;
-	}
-
-	rps->last_adj = adj;
-
-	/* sysfs frequency interfaces may have snuck in while servicing the
-	 * interrupt
-	 */
-	new_delay += adj;
-	new_delay = clamp_t(int, new_delay, min, max);
-
-	if (intel_set_rps(dev_priv, new_delay)) {
-		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
-		rps->last_adj = 0;
-	}
-
-	mutex_unlock(&rps->lock);
-
-out:
-	/* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
-	spin_lock_irq(&dev_priv->irq_lock);
-	if (rps->interrupts_enabled)
-		gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
-	spin_unlock_irq(&dev_priv->irq_lock);
-}
-
-
 /**
  * ivybridge_parity_work - Workqueue called when a parity error interrupt
  * occurred.
@@ -4267,8 +4128,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
 	intel_hpd_init_work(dev_priv);
 
-	INIT_WORK(&rps->work, gen6_pm_rps_work);
-
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
 	for (i = 0; i < MAX_L3_SLICES; ++i)
 		dev_priv->l3_parity.remap_info[i] = NULL;
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index fde5f0139ca1..a72aab28399f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&rps->lock);
-
 	val = intel_freq_opcode(dev_priv, val);
+
+	mutex_lock(&rps->lock);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val < rps->min_freq_softlimit) {
@@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
 			  intel_gpu_freq(dev_priv, val));
 
 	rps->max_freq_softlimit = val;
-
-	val = clamp_t(int, rps->cur_freq,
-		      rps->min_freq_softlimit,
-		      rps->max_freq_softlimit);
-
-	/* We still need *_set_rps to process the new max_delay and
-	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(dev_priv);
+	flush_work(&rps->work);
 
 	return ret ?: count;
 }
@@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 {
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	u32 val;
 	ssize_t ret;
+	u32 val;
 
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 		return ret;
 
-	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&rps->lock);
-
 	val = intel_freq_opcode(dev_priv, val);
+
+	mutex_lock(&rps->lock);
 	if (val < rps->min_freq ||
 	    val > rps->max_freq ||
 	    val > rps->max_freq_softlimit) {
@@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
 	}
 
 	rps->min_freq_softlimit = val;
-
-	val = clamp_t(int, rps->cur_freq,
-		      rps->min_freq_softlimit,
-		      rps->max_freq_softlimit);
-
-	/* We still need *_set_rps to process the new min_delay and
-	 * update the interrupt limits and PMINTRMSK even though
-	 * frequency request may be unchanged. */
-	ret = intel_set_rps(dev_priv, val);
+	schedule_work(&rps->work);
 
 unlock:
 	mutex_unlock(&rps->lock);
-	intel_runtime_pm_put(dev_priv);
+	flush_work(&rps->work);
 
 	return ret ?: count;
 }
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 22ac33218e87..64572d66b45f 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -336,12 +336,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
-	/* min/max delay may still have been modified so be sure to
-	 * write the limits value.
-	 */
 	if (val != rps->cur_freq) {
-		gen6_set_rps_thresholds(dev_priv, val);
-
 		if (INTEL_GEN(dev_priv) >= 9)
 			I915_WRITE(GEN6_RPNSWREQ,
 				   GEN9_FREQUENCY(val));
@@ -355,7 +350,10 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
 				   GEN6_AGGRESSIVE_TURBO);
 	}
 
-	/* Make sure we continue to get interrupts
+	gen6_set_rps_thresholds(dev_priv, val);
+
+	/*
+	 * Make sure we continue to get interrupts
 	 * until we hit the minimum or maximum frequencies.
 	 */
 	I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val));
@@ -375,18 +373,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
 		      "Odd GPU freq value\n"))
 		val &= ~1;
 
-	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
-
 	if (val != dev_priv->gt_pm.rps.cur_freq) {
 		vlv_punit_get(dev_priv);
 		err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
 		vlv_punit_put(dev_priv);
 		if (err)
 			return err;
-
-		gen6_set_rps_thresholds(dev_priv, val);
 	}
 
+	gen6_set_rps_thresholds(dev_priv, val);
+	I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
+
 	dev_priv->gt_pm.rps.cur_freq = val;
 	trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
 
@@ -429,6 +426,153 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
 		DRM_ERROR("Failed to set RPS for idle\n");
 }
 
+static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int err;
+
+	lockdep_assert_held(&rps->lock);
+	GEM_BUG_ON(val > rps->max_freq);
+	GEM_BUG_ON(val < rps->min_freq);
+
+	if (!rps->enabled) {
+		rps->cur_freq = val;
+		return 0;
+	}
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
+		err = valleyview_set_rps(dev_priv, val);
+	else
+		err = gen6_set_rps(dev_priv, val);
+
+	return err;
+}
+
+static void vlv_c0_read(struct drm_i915_private *dev_priv,
+			struct intel_rps_ei *ei)
+{
+	ei->ktime = ktime_get_raw();
+	ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
+	ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
+}
+
+static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
+{
+	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	const struct intel_rps_ei *prev = &rps->ei;
+	struct intel_rps_ei now;
+	u32 events = 0;
+
+	if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
+		return 0;
+
+	vlv_c0_read(dev_priv, &now);
+
+	if (prev->ktime) {
+		u64 time, c0;
+		u32 render, media;
+
+		time = ktime_us_delta(now.ktime, prev->ktime);
+
+		time *= dev_priv->czclk_freq;
+
+		/* Workload can be split between render + media,
+		 * e.g. SwapBuffers being blitted in X after being rendered in
+		 * mesa. To account for this we need to combine both engines
+		 * into our activity counter.
+		 */
+		render = now.render_c0 - prev->render_c0;
+		media = now.media_c0 - prev->media_c0;
+		c0 = max(render, media);
+		c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
+
+		if (c0 > time * rps->up_threshold)
+			events = GEN6_PM_RP_UP_THRESHOLD;
+		else if (c0 < time * rps->down_threshold)
+			events = GEN6_PM_RP_DOWN_THRESHOLD;
+	}
+
+	rps->ei = now;
+	return events;
+}
+
+static void intel_rps_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, gt_pm.rps.work);
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	int freq, adj, min, max;
+	bool client_boost;
+	u32 pm_iir;
+
+	pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
+	pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
+
+	client_boost = atomic_read(&rps->num_waiters);
+
+	mutex_lock(&rps->lock);
+
+	min = rps->min_freq_softlimit;
+	max = rps->max_freq_softlimit;
+	if (client_boost && max < rps->boost_freq)
+		max = rps->boost_freq;
+
+	GEM_BUG_ON(min < rps->min_freq);
+	GEM_BUG_ON(max > rps->max_freq);
+	GEM_BUG_ON(max < min);
+
+	adj = rps->last_adj;
+	freq = rps->cur_freq;
+	if (client_boost && freq < rps->boost_freq) {
+		freq = rps->boost_freq;
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
+		if (adj > 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(i915) ? 2 : 1;
+
+		if (freq >= max)
+			adj = 0;
+	} else if (client_boost) {
+		adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
+		if (freq > max_t(int, rps->efficient_freq, min))
+			freq = max_t(int, rps->efficient_freq, min);
+		else if (freq > min_t(int, rps->efficient_freq, min))
+			freq = min_t(int, rps->efficient_freq, min);
+
+		 adj = 0;
+	} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
+		if (adj < 0)
+			adj *= 2;
+		else /* CHV needs even encode values */
+			adj = IS_CHERRYVIEW(i915) ? -2 : -1;
+
+		if (freq <= min)
+			adj = 0;
+	} else { /* unknown/external event */
+		adj = 0;
+	}
+
+	if (pm_iir)
+		rps->last_adj = adj;
+
+	if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
+		DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
+		rps->last_adj = 0;
+	}
+
+	mutex_unlock(&rps->lock);
+
+	if (pm_iir) {
+		spin_lock_irq(&i915->irq_lock);
+		if (rps->interrupts_enabled)
+			gen6_unmask_pm_irq(i915, i915->pm_rps_events);
+		spin_unlock_irq(&i915->irq_lock);
+	}
+}
+
 void gen6_rps_busy(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
@@ -437,12 +581,11 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
 	if (rps->enabled) {
 		u8 freq;
 
-		if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
-			gen6_rps_reset_ei(dev_priv);
 		I915_WRITE(GEN6_PMINTRMSK,
 			   gen6_rps_pm_mask(dev_priv, rps->cur_freq));
 
 		gen6_enable_rps_interrupts(dev_priv);
+		memset(&rps->ei, 0, sizeof(rps->ei));
 
 		/* Use the user's desired frequency as a guide, but for better
 		 * performance, jump directly to RPe as our starting frequency.
@@ -515,28 +658,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	atomic_inc(client ? &client->boosts : &rps->boosts);
 }
 
-int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
-{
-	struct intel_rps *rps = &dev_priv->gt_pm.rps;
-	int err;
-
-	lockdep_assert_held(&rps->lock);
-	GEM_BUG_ON(val > rps->max_freq);
-	GEM_BUG_ON(val < rps->min_freq);
-
-	if (!rps->enabled) {
-		rps->cur_freq = val;
-		return 0;
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		err = valleyview_set_rps(dev_priv, val);
-	else
-		err = gen6_set_rps(dev_priv, val);
-
-	return err;
-}
-
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2092,6 +2213,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 
 	mutex_init(&rps->lock);
+	INIT_WORK(&rps->work, intel_rps_work);
 
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index ab4f73a39ce6..f760226e5048 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -39,7 +39,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
 void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
 
 void gen6_rps_busy(struct drm_i915_private *dev_priv);
-void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
 void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
-- 
2.16.2



More information about the Intel-gfx-trybot mailing list