[Intel-gfx] [PATCH 22/36] drm/i915: Move rps worker to intel_gt_pm.c
Sagar Arun Kamble
sagar.a.kamble at intel.com
Fri Mar 16 07:12:02 UTC 2018
On 3/14/2018 3:07 PM, Chris Wilson wrote:
> The RPS worker exists to do the bidding of the GT powermanagement, so
> move it from i915_irq to intel_gt_pm.c where it can be hidden from the
> rest of the world. The goal being that the RPS worker is the one true
> way though which all RPS updates are coordinated.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 -
> drivers/gpu/drm/i915/i915_irq.c | 141 ----------------------------
> drivers/gpu/drm/i915/i915_sysfs.c | 38 ++------
> drivers/gpu/drm/i915/intel_gt_pm.c | 186 ++++++++++++++++++++++++++++++-------
> drivers/gpu/drm/i915/intel_gt_pm.h | 1 -
> 5 files changed, 162 insertions(+), 205 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 5c10acf767a8..a57b20f95cdc 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3406,7 +3406,6 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv);
> extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv);
> extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val);
> extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv);
> -extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
> extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
> bool enable);
>
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index f815da0dd991..d9cf4f81979e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1130,145 +1130,6 @@ static void notify_ring(struct intel_engine_cs *engine)
> trace_intel_engine_notify(engine, wait);
> }
>
> -static void vlv_c0_read(struct drm_i915_private *dev_priv,
> - struct intel_rps_ei *ei)
> -{
> - ei->ktime = ktime_get_raw();
> - ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> - ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> -}
> -
> -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
> -{
> - memset(&dev_priv->gt_pm.rps.ei, 0, sizeof(dev_priv->gt_pm.rps.ei));
> -}
> -
> -static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> -{
> - struct intel_rps *rps = &dev_priv->gt_pm.rps;
> - const struct intel_rps_ei *prev = &rps->ei;
> - struct intel_rps_ei now;
> - u32 events = 0;
> -
> - if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> - return 0;
> -
> - vlv_c0_read(dev_priv, &now);
> -
> - if (prev->ktime) {
> - u64 time, c0;
> - u32 render, media;
> -
> - time = ktime_us_delta(now.ktime, prev->ktime);
> -
> - time *= dev_priv->czclk_freq;
> -
> - /* Workload can be split between render + media,
> - * e.g. SwapBuffers being blitted in X after being rendered in
> - * mesa. To account for this we need to combine both engines
> - * into our activity counter.
> - */
> - render = now.render_c0 - prev->render_c0;
> - media = now.media_c0 - prev->media_c0;
> - c0 = max(render, media);
> - c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
> -
> - if (c0 > time * rps->up_threshold)
> - events = GEN6_PM_RP_UP_THRESHOLD;
> - else if (c0 < time * rps->down_threshold)
> - events = GEN6_PM_RP_DOWN_THRESHOLD;
> - }
> -
> - rps->ei = now;
> - return events;
> -}
> -
> -static void gen6_pm_rps_work(struct work_struct *work)
> -{
> - struct drm_i915_private *dev_priv =
> - container_of(work, struct drm_i915_private, gt_pm.rps.work);
> - struct intel_rps *rps = &dev_priv->gt_pm.rps;
> - bool client_boost = false;
> - int new_delay, adj, min, max;
> - u32 pm_iir = 0;
> -
> - spin_lock_irq(&dev_priv->irq_lock);
> - if (rps->interrupts_enabled) {
> - pm_iir = fetch_and_zero(&rps->pm_iir);
> - client_boost = atomic_read(&rps->num_waiters);
> - }
> - spin_unlock_irq(&dev_priv->irq_lock);
> -
> - /* Make sure we didn't queue anything we're not going to process. */
> - WARN_ON(pm_iir & ~dev_priv->pm_rps_events);
> - if ((pm_iir & dev_priv->pm_rps_events) == 0 && !client_boost)
> - goto out;
> -
> - mutex_lock(&rps->lock);
> -
> - pm_iir |= vlv_wa_c0_ei(dev_priv, pm_iir);
> -
> - adj = rps->last_adj;
> - new_delay = rps->cur_freq;
> - min = rps->min_freq_softlimit;
> - max = rps->max_freq_softlimit;
> - if (client_boost)
> - max = rps->max_freq;
> - if (client_boost && new_delay < rps->boost_freq) {
> - new_delay = rps->boost_freq;
> - adj = 0;
> - } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> - if (adj > 0)
> - adj *= 2;
> - else /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv) ? 2 : 1;
> -
> - if (new_delay >= rps->max_freq_softlimit)
> - adj = 0;
> - } else if (client_boost) {
> - adj = 0;
> - } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> - if (rps->cur_freq > rps->efficient_freq)
> - new_delay = rps->efficient_freq;
> - else if (rps->cur_freq > rps->min_freq_softlimit)
> - new_delay = rps->min_freq_softlimit;
> - adj = 0;
> - } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> - if (adj < 0)
> - adj *= 2;
> - else /* CHV needs even encode values */
> - adj = IS_CHERRYVIEW(dev_priv) ? -2 : -1;
> -
> - if (new_delay <= rps->min_freq_softlimit)
> - adj = 0;
> - } else { /* unknown event */
> - adj = 0;
> - }
> -
> - rps->last_adj = adj;
> -
> - /* sysfs frequency interfaces may have snuck in while servicing the
> - * interrupt
> - */
> - new_delay += adj;
> - new_delay = clamp_t(int, new_delay, min, max);
> -
> - if (intel_set_rps(dev_priv, new_delay)) {
> - DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
> - rps->last_adj = 0;
> - }
> -
> - mutex_unlock(&rps->lock);
> -
> -out:
> - /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
> - spin_lock_irq(&dev_priv->irq_lock);
> - if (rps->interrupts_enabled)
> - gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events);
> - spin_unlock_irq(&dev_priv->irq_lock);
> -}
> -
> -
> /**
> * ivybridge_parity_work - Workqueue called when a parity error interrupt
> * occurred.
> @@ -4239,8 +4100,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
>
> intel_hpd_init_work(dev_priv);
>
> - INIT_WORK(&rps->work, gen6_pm_rps_work);
> -
> INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
> for (i = 0; i < MAX_L3_SLICES; ++i)
> dev_priv->l3_parity.remap_info[i] = NULL;
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index fde5f0139ca1..a72aab28399f 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -355,17 +355,16 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
> {
> struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> struct intel_rps *rps = &dev_priv->gt_pm.rps;
> - u32 val;
> ssize_t ret;
> + u32 val;
>
> ret = kstrtou32(buf, 0, &val);
> if (ret)
> return ret;
>
> - intel_runtime_pm_get(dev_priv);
> - mutex_lock(&rps->lock);
> -
> val = intel_freq_opcode(dev_priv, val);
> +
> + mutex_lock(&rps->lock);
> if (val < rps->min_freq ||
> val > rps->max_freq ||
> val < rps->min_freq_softlimit) {
> @@ -378,19 +377,11 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev,
> intel_gpu_freq(dev_priv, val));
>
> rps->max_freq_softlimit = val;
> -
> - val = clamp_t(int, rps->cur_freq,
> - rps->min_freq_softlimit,
> - rps->max_freq_softlimit);
> -
> - /* We still need *_set_rps to process the new max_delay and
> - * update the interrupt limits and PMINTRMSK even though
> - * frequency request may be unchanged. */
> - ret = intel_set_rps(dev_priv, val);
> + schedule_work(&rps->work);
>
> unlock:
> mutex_unlock(&rps->lock);
> - intel_runtime_pm_put(dev_priv);
> + flush_work(&rps->work);
>
> return ret ?: count;
> }
> @@ -410,17 +401,16 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
> {
> struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> struct intel_rps *rps = &dev_priv->gt_pm.rps;
> - u32 val;
> ssize_t ret;
> + u32 val;
>
> ret = kstrtou32(buf, 0, &val);
> if (ret)
> return ret;
>
> - intel_runtime_pm_get(dev_priv);
> - mutex_lock(&rps->lock);
> -
> val = intel_freq_opcode(dev_priv, val);
> +
> + mutex_lock(&rps->lock);
> if (val < rps->min_freq ||
> val > rps->max_freq ||
> val > rps->max_freq_softlimit) {
> @@ -429,19 +419,11 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev,
> }
>
> rps->min_freq_softlimit = val;
> -
> - val = clamp_t(int, rps->cur_freq,
> - rps->min_freq_softlimit,
> - rps->max_freq_softlimit);
> -
> - /* We still need *_set_rps to process the new min_delay and
> - * update the interrupt limits and PMINTRMSK even though
> - * frequency request may be unchanged. */
> - ret = intel_set_rps(dev_priv, val);
> + schedule_work(&rps->work);
>
> unlock:
> mutex_unlock(&rps->lock);
> - intel_runtime_pm_put(dev_priv);
> + flush_work(&rps->work);
>
> return ret ?: count;
> }
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
> index 763bf9378ae8..293cea1221af 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.c
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.c
> @@ -328,13 +328,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
> {
> struct intel_rps *rps = &dev_priv->gt_pm.rps;
>
> - /*
> - * min/max delay may still have been modified so be sure to
> - * write the limits value.
> - */
> if (val != rps->cur_freq) {
> - gen6_set_rps_thresholds(dev_priv, val);
> -
> if (INTEL_GEN(dev_priv) >= 9)
> I915_WRITE(GEN6_RPNSWREQ,
> GEN9_FREQUENCY(val));
> @@ -348,6 +342,8 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val)
> GEN6_AGGRESSIVE_TURBO);
> }
>
> + gen6_set_rps_thresholds(dev_priv, val);
> +
> /*
> * Make sure we continue to get interrupts
> * until we hit the minimum or maximum frequencies.
> @@ -369,18 +365,17 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val)
> "Odd GPU freq value\n"))
> val &= ~1;
>
> - I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> -
> if (val != dev_priv->gt_pm.rps.cur_freq) {
> vlv_punit_get(dev_priv);
> err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val);
> vlv_punit_put(dev_priv);
> if (err)
> return err;
> -
> - gen6_set_rps_thresholds(dev_priv, val);
> }
>
> + gen6_set_rps_thresholds(dev_priv, val);
> + I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
> +
> dev_priv->gt_pm.rps.cur_freq = val;
> trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val));
>
> @@ -425,6 +420,151 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
> DRM_ERROR("Failed to set RPS for idle\n");
> }
>
> +static int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> +{
> + struct intel_rps *rps = &dev_priv->gt_pm.rps;
> + int err;
> +
> + lockdep_assert_held(&rps->lock);
> + GEM_BUG_ON(val > rps->max_freq);
> + GEM_BUG_ON(val < rps->min_freq);
> +
> + if (!rps->enabled) {
> + rps->cur_freq = val;
> + return 0;
> + }
> +
> + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> + err = valleyview_set_rps(dev_priv, val);
> + else
> + err = gen6_set_rps(dev_priv, val);
> +
> + return err;
> +}
> +
> +static void vlv_c0_read(struct drm_i915_private *dev_priv,
> + struct intel_rps_ei *ei)
> +{
> + ei->ktime = ktime_get_raw();
> + ei->render_c0 = I915_READ(VLV_RENDER_C0_COUNT);
> + ei->media_c0 = I915_READ(VLV_MEDIA_C0_COUNT);
> +}
> +
> +static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
> +{
> + struct intel_rps *rps = &dev_priv->gt_pm.rps;
> + const struct intel_rps_ei *prev = &rps->ei;
> + struct intel_rps_ei now;
> + u32 events = 0;
> +
> + if ((pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) == 0)
> + return 0;
> +
> + vlv_c0_read(dev_priv, &now);
> +
> + if (prev->ktime) {
> + u64 time, c0;
> + u32 render, media;
> +
> + time = ktime_us_delta(now.ktime, prev->ktime);
> +
> + time *= dev_priv->czclk_freq;
> +
> + /* Workload can be split between render + media,
> + * e.g. SwapBuffers being blitted in X after being rendered in
> + * mesa. To account for this we need to combine both engines
> + * into our activity counter.
> + */
> + render = now.render_c0 - prev->render_c0;
> + media = now.media_c0 - prev->media_c0;
> + c0 = max(render, media);
> + c0 *= 1000 * 100 << 8; /* to usecs and scale to threshold% */
> +
> + if (c0 > time * rps->up_threshold)
> + events = GEN6_PM_RP_UP_THRESHOLD;
> + else if (c0 < time * rps->down_threshold)
> + events = GEN6_PM_RP_DOWN_THRESHOLD;
> + }
> +
> + rps->ei = now;
> + return events;
> +}
> +
> +static void intel_rps_work(struct work_struct *work)
> +{
> + struct drm_i915_private *i915 =
> + container_of(work, struct drm_i915_private, gt_pm.rps.work);
> + struct intel_rps *rps = &i915->gt_pm.rps;
> + int freq, adj, min, max;
> + bool client_boost;
> + u32 pm_iir;
> +
> + pm_iir = xchg(&rps->pm_iir, 0) & ~i915->pm_rps_events;
> + pm_iir |= vlv_wa_c0_ei(i915, pm_iir);
> +
> + client_boost = atomic_read(&rps->num_waiters);
> +
> + mutex_lock(&rps->lock);
> +
> + min = rps->min_freq_softlimit;
> + max = rps->max_freq_softlimit;
> + if (client_boost && max < rps->boost_freq)
> + max = rps->boost_freq;
> +
> + GEM_BUG_ON(min < rps->min_freq);
> + GEM_BUG_ON(max > rps->max_freq);
> + GEM_BUG_ON(max < min);
> +
> + adj = rps->last_adj;
> + freq = rps->cur_freq;
> + if (client_boost && freq < rps->boost_freq) {
> + freq = rps->boost_freq;
> + adj = 0;
> + } else if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
> + if (adj > 0)
> + adj *= 2;
> + else /* CHV needs even encode values */
> + adj = IS_CHERRYVIEW(i915) ? 2 : 1;
> +
> + if (freq >= max)
> + adj = 0;
> + } else if (client_boost) {
> + adj = 0;
> + } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) {
> + if (freq > max_t(int, rps->efficient_freq, min))
> + freq = max_t(int, rps->efficient_freq, min);
> + else if (freq > min_t(int, rps->efficient_freq, min))
> + freq = min_t(int, rps->efficient_freq, min);
> +
> + adj = 0;
> + } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
> + if (adj < 0)
> + adj *= 2;
> + else /* CHV needs even encode values */
> + adj = IS_CHERRYVIEW(i915) ? -2 : -1;
> +
> + if (freq <= min)
> + adj = 0;
> + } else { /* unknown/external event */
> + adj = 0;
> + }
> +
> + if (intel_set_rps(i915, clamp_t(int, freq + adj, min, max))) {
> + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n");
> + adj = 0;
> + }
> +
> + mutex_unlock(&rps->lock);
> +
> + if (pm_iir) {
> + spin_lock_irq(&i915->irq_lock);
> + if (rps->interrupts_enabled)
> + gen6_unmask_pm_irq(i915, i915->pm_rps_events);
> + spin_unlock_irq(&i915->irq_lock);
> + rps->last_adj = adj;
> + }
> +}
> +
> void gen6_rps_busy(struct drm_i915_private *dev_priv)
> {
> struct intel_rps *rps = &dev_priv->gt_pm.rps;
> @@ -433,12 +573,11 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv)
> if (rps->enabled) {
> u8 freq;
>
> - if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
> - gen6_rps_reset_ei(dev_priv);
> I915_WRITE(GEN6_PMINTRMSK,
> gen6_rps_pm_mask(dev_priv, rps->cur_freq));
>
> gen6_enable_rps_interrupts(dev_priv);
> + memset(&rps->ei, 0, sizeof(rps->ei));
>
> /*
> * Use the user's desired frequency as a guide, but for better
> @@ -514,28 +653,6 @@ void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
> atomic_inc(client ? &client->boosts : &rps->boosts);
> }
>
> -int intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
> -{
> - struct intel_rps *rps = &dev_priv->gt_pm.rps;
> - int err;
> -
> - lockdep_assert_held(&rps->lock);
> - GEM_BUG_ON(val > rps->max_freq);
> - GEM_BUG_ON(val < rps->min_freq);
> -
> - if (!rps->enabled) {
> - rps->cur_freq = val;
> - return 0;
> - }
> -
> - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
> - err = valleyview_set_rps(dev_priv, val);
> - else
> - err = gen6_set_rps(dev_priv, val);
> -
> - return err;
> -}
> -
> static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
> {
> I915_WRITE(GEN6_RC_CONTROL, 0);
> @@ -2119,6 +2236,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv)
> struct intel_rps *rps = &dev_priv->gt_pm.rps;
>
> mutex_init(&rps->lock);
> + INIT_WORK(&rps->work, intel_rps_work);
>
> /*
> * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
> diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
> index ab4f73a39ce6..f760226e5048 100644
> --- a/drivers/gpu/drm/i915/intel_gt_pm.h
> +++ b/drivers/gpu/drm/i915/intel_gt_pm.h
> @@ -39,7 +39,6 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv);
> void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv);
>
> void gen6_rps_busy(struct drm_i915_private *dev_priv);
> -void gen6_rps_reset_ei(struct drm_i915_private *dev_priv);
> void gen6_rps_idle(struct drm_i915_private *dev_priv);
> void gen6_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
>
--
Thanks,
Sagar
More information about the Intel-gfx
mailing list