[Intel-gfx] [PATCH 15/16] drm/i915, intel_ips: Enable GPU wait-boosting with IPS
Chris Wilson
chris at chris-wilson.co.uk
Mon Apr 27 05:41:26 PDT 2015
Refactor the reclocking logic used by RPS on Ironlake to reuse the
infrastructure developed for RPS on Sandybridge+, along with the
waitboosting support for stalled clients and missed frames.
Reported-by: dimon at gmx.net
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90137
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Jesse Barnes <jesse at virtuousgeek.org>
---
drivers/gpu/drm/i915/i915_drv.h | 8 ---
drivers/gpu/drm/i915/i915_irq.c | 83 +++++++++++------------
drivers/gpu/drm/i915/i915_sysfs.c | 10 +++
drivers/gpu/drm/i915/intel_display.c | 1 -
drivers/gpu/drm/i915/intel_pm.c | 124 ++++++++++++++++++++++++-----------
drivers/platform/x86/intel_ips.c | 14 +++-
include/drm/i915_drm.h | 1 +
7 files changed, 149 insertions(+), 92 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7a260da815ad..c35723ace814 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1088,12 +1088,6 @@ struct intel_gen6_power_mgmt {
extern spinlock_t mchdev_lock;
struct intel_ilk_power_mgmt {
- u8 cur_delay;
- u8 min_delay;
- u8 max_delay;
- u8 fmax;
- u8 fstart;
-
u64 last_count1;
unsigned long last_time1;
unsigned long chipset_power;
@@ -2533,7 +2527,6 @@ extern int i915_reset(struct drm_device *dev);
extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv);
extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv);
extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
-extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
@@ -3175,7 +3168,6 @@ extern void intel_modeset_setup_hw_state(struct drm_device *dev,
bool force_restore);
extern void i915_redisable_vga(struct drm_device *dev);
extern void i915_redisable_vga_power_on(struct drm_device *dev);
-extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
extern void intel_init_pch_refclk(struct drm_device *dev);
extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val);
extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 375633356ac0..234a6e004a4d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -952,45 +952,6 @@ static void i915_hotplug_work_func(struct work_struct *work)
drm_kms_helper_hotplug_event(dev);
}
-static void ironlake_rps_change_irq_handler(struct drm_device *dev)
-{
- struct drm_i915_private *dev_priv = dev->dev_private;
- u32 busy_up, busy_down, max_avg, min_avg;
- u8 new_delay;
-
- spin_lock(&mchdev_lock);
-
- I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
-
- new_delay = dev_priv->ips.cur_delay;
-
- I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
- busy_up = I915_READ(RCPREVBSYTUPAVG);
- busy_down = I915_READ(RCPREVBSYTDNAVG);
- max_avg = I915_READ(RCBMAXAVG);
- min_avg = I915_READ(RCBMINAVG);
-
- /* Handle RCS change request from hw */
- if (busy_up > max_avg) {
- if (dev_priv->ips.cur_delay != dev_priv->ips.max_delay)
- new_delay = dev_priv->ips.cur_delay - 1;
- if (new_delay < dev_priv->ips.max_delay)
- new_delay = dev_priv->ips.max_delay;
- } else if (busy_down < min_avg) {
- if (dev_priv->ips.cur_delay != dev_priv->ips.min_delay)
- new_delay = dev_priv->ips.cur_delay + 1;
- if (new_delay > dev_priv->ips.min_delay)
- new_delay = dev_priv->ips.min_delay;
- }
-
- if (ironlake_set_drps(dev, new_delay))
- dev_priv->ips.cur_delay = new_delay;
-
- spin_unlock(&mchdev_lock);
-
- return;
-}
-
static void notify_ring(struct intel_engine_cs *ring)
{
if (!intel_ring_initialized(ring))
@@ -1039,6 +1000,36 @@ void gen6_rps_reset_ei(struct drm_i915_private *dev_priv)
dev_priv->rps.up_ei = dev_priv->rps.down_ei;
}
+static u32 ilk_compute_pm_iir(struct drm_i915_private *dev_priv)
+{
+ u32 pm_iir;
+
+ spin_lock(&mchdev_lock);
+ I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
+ I915_WRITE16(MEMINTRSTS, MEMINT_EVAL_CHG);
+
+#define busy_up I915_READ(RCPREVBSYTUPAVG)
+#define busy_down I915_READ(RCPREVBSYTDNAVG)
+#define max_avg I915_READ(RCBMAXAVG)
+#define min_avg I915_READ(RCBMINAVG)
+
+ if (busy_up > max_avg)
+ pm_iir = GEN6_PM_RP_UP_THRESHOLD;
+ else if (busy_down < min_avg)
+ pm_iir = GEN6_PM_RP_DOWN_THRESHOLD;
+ else
+ pm_iir = 0;
+
+#undef busy_up
+#undef busy_down
+#undef max_avg
+#undef min_avg
+
+ spin_unlock(&mchdev_lock);
+
+ return pm_iir;
+}
+
static u32 vlv_wa_c0_ei(struct drm_i915_private *dev_priv, u32 pm_iir)
{
struct intel_rps_ei now;
@@ -1095,10 +1086,14 @@ static void gen6_pm_rps_work(struct work_struct *work)
spin_unlock_irq(&dev_priv->irq_lock);
return;
}
- pm_iir = dev_priv->rps.pm_iir;
- dev_priv->rps.pm_iir = 0;
- /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */
- gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+ if (IS_GEN5(dev_priv)) {
+ pm_iir = ilk_compute_pm_iir(dev_priv);
+ } else {
+ pm_iir = dev_priv->rps.pm_iir;
+ dev_priv->rps.pm_iir = 0;
+ /* Make sure not to corrupt PMIMR state used by ringbuffer */
+ gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+ }
spin_unlock_irq(&dev_priv->irq_lock);
/* Make sure we didn't queue anything we're not going to process. */
@@ -2045,7 +2040,7 @@ static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir)
}
if (IS_GEN5(dev) && de_iir & DE_PCU_EVENT)
- ironlake_rps_change_irq_handler(dev);
+ queue_work(dev_priv->wq, &dev_priv->rps.work);
}
static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir)
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index df5636093397..6f770e7f92db 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -522,6 +522,14 @@ static ssize_t gt_rp_mhz_show(struct device *kdev, struct device_attribute *attr
return snprintf(buf, PAGE_SIZE, "%d\n", val);
}
+static const struct attribute *gen5_attrs[] = {
+ &dev_attr_gt_cur_freq_mhz.attr,
+ &dev_attr_gt_max_freq_mhz.attr,
+ &dev_attr_gt_min_freq_mhz.attr,
+ &dev_attr_gt_RP0_freq_mhz.attr,
+ &dev_attr_gt_RPn_freq_mhz.attr,
+ NULL,
+};
static const struct attribute *gen6_attrs[] = {
&dev_attr_gt_act_freq_mhz.attr,
&dev_attr_gt_cur_freq_mhz.attr,
@@ -652,6 +660,8 @@ void i915_setup_sysfs(struct drm_device *dev)
ret = sysfs_create_files(&dev->primary->kdev->kobj, vlv_attrs);
else if (INTEL_INFO(dev)->gen >= 6)
ret = sysfs_create_files(&dev->primary->kdev->kobj, gen6_attrs);
+ else if (INTEL_INFO(dev)->gen >= 5)
+ ret = sysfs_create_files(&dev->primary->kdev->kobj, gen5_attrs);
if (ret)
DRM_ERROR("RPS sysfs setup failed\n");
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index f63f194141db..e228070031ed 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -10081,7 +10081,6 @@ void intel_mark_busy(struct drm_device *dev)
return;
intel_runtime_pm_get(dev_priv);
- i915_update_gfx_val(dev_priv);
intel_rps_busy(dev_priv);
dev_priv->mm.busy = true;
}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 0dcc7bb47f71..0cc9e95f70d3 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3727,21 +3727,29 @@ DEFINE_SPINLOCK(mchdev_lock);
* mchdev_lock. */
static struct drm_i915_private *i915_mch_dev;
-bool ironlake_set_drps(struct drm_device *dev, u8 val)
+static bool __ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
u16 rgvswctl;
+ if (WARN_ON(val < dev_priv->rps.min_freq))
+ return false;
+ if (WARN_ON(val > dev_priv->rps.max_freq))
+ return false;
+
assert_spin_locked(&mchdev_lock);
- rgvswctl = I915_READ16(MEMSWCTL);
- if (rgvswctl & MEMCTL_CMD_STS) {
+ if (wait_for_atomic((I915_READ(MEMSWCTL) & MEMCTL_CMD_STS) == 0, 10)) {
DRM_DEBUG("gpu busy, RCS change rejected\n");
return false; /* still busy with another command */
}
- rgvswctl = (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
- (val << MEMCTL_FREQ_SHIFT) | MEMCTL_SFCAVM;
+ dev_priv->rps.cur_freq = val;
+ val = dev_priv->rps.max_freq - val + dev_priv->rps.min_freq;
+
+ rgvswctl =
+ (MEMCTL_CMD_CHFREQ << MEMCTL_CMD_SHIFT) |
+ (val << MEMCTL_FREQ_SHIFT) |
+ MEMCTL_SFCAVM;
I915_WRITE16(MEMSWCTL, rgvswctl);
POSTING_READ16(MEMSWCTL);
@@ -3751,6 +3759,13 @@ bool ironlake_set_drps(struct drm_device *dev, u8 val)
return true;
}
+static void ironlake_set_rps(struct drm_i915_private *dev_priv, u8 val)
+{
+ spin_lock_irq(&mchdev_lock);
+ __ironlake_set_rps(dev_priv, val);
+ spin_unlock_irq(&mchdev_lock);
+}
+
static void ironlake_enable_drps(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3782,16 +3797,18 @@ static void ironlake_enable_drps(struct drm_device *dev)
vstart = (I915_READ(PXVFREQ_BASE + (fstart * 4)) & PXVFREQ_PX_MASK) >>
PXVFREQ_PX_SHIFT;
- dev_priv->ips.fmax = fmax; /* IPS callback will increase this */
- dev_priv->ips.fstart = fstart;
-
- dev_priv->ips.max_delay = fstart;
- dev_priv->ips.min_delay = fmin;
- dev_priv->ips.cur_delay = fstart;
+ dev_priv->rps.max_freq = fmin;
+ dev_priv->rps.min_freq = fmax;
+ dev_priv->rps.cur_freq = fmin - fstart;
DRM_DEBUG_DRIVER("fmax: %d, fmin: %d, fstart: %d\n",
fmax, fmin, fstart);
+ dev_priv->rps.max_freq_softlimit = dev_priv->rps.min_freq;
+ dev_priv->rps.min_freq_softlimit = dev_priv->rps.min_freq;
+ dev_priv->rps.efficient_freq = dev_priv->rps.cur_freq;
+ dev_priv->rps.idle_freq = dev_priv->rps.min_freq;
+
I915_WRITE(MEMINTREN, MEMINT_CX_SUPR_EN | MEMINT_EVAL_CHG_EN);
/*
@@ -3808,7 +3825,7 @@ static void ironlake_enable_drps(struct drm_device *dev)
DRM_ERROR("stuck trying to change perf mode\n");
mdelay(1);
- ironlake_set_drps(dev, fstart);
+ __ironlake_set_rps(dev_priv, dev_priv->rps.cur_freq);
dev_priv->ips.last_count1 = I915_READ(0x112e4) + I915_READ(0x112e8) +
I915_READ(0x112e0);
@@ -3836,7 +3853,7 @@ static void ironlake_disable_drps(struct drm_device *dev)
I915_WRITE(DEIMR, I915_READ(DEIMR) | DE_PCU_EVENT);
/* Go back to the starting frequency */
- ironlake_set_drps(dev, dev_priv->ips.fstart);
+ __ironlake_set_rps(dev_priv, dev_priv->rps.efficient_freq);
mdelay(1);
rgvswctl |= MEMCTL_CMD_STS;
I915_WRITE(MEMSWCTL, rgvswctl);
@@ -4086,29 +4103,37 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)
I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val));
}
+static void i915_update_gfx_val(struct drm_i915_private *dev_priv);
+
void intel_rps_busy(struct drm_i915_private *dev_priv)
{
- if (INTEL_INFO(dev_priv)->gen < 6)
+ if (INTEL_INFO(dev_priv)->gen < 5)
return;
- mutex_lock(&dev_priv->rps.hw_lock);
- if (dev_priv->rps.enabled) {
- if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
- gen6_rps_reset_ei(dev_priv);
- I915_WRITE(GEN6_PMINTRMSK,
- gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ if (INTEL_INFO(dev_priv)->gen >= 6) {
+ mutex_lock(&dev_priv->rps.hw_lock);
+ if (dev_priv->rps.enabled) {
+ if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED))
+ gen6_rps_reset_ei(dev_priv);
+ I915_WRITE(GEN6_PMINTRMSK,
+ gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
+ }
+ mutex_unlock(&dev_priv->rps.hw_lock);
}
- mutex_unlock(&dev_priv->rps.hw_lock);
+
+ i915_update_gfx_val(dev_priv);
}
void intel_rps_idle(struct drm_i915_private *dev_priv)
{
- if (INTEL_INFO(dev_priv)->gen < 6)
+ if (INTEL_INFO(dev_priv)->gen < 5)
return;
mutex_lock(&dev_priv->rps.hw_lock);
if (dev_priv->rps.enabled) {
- if (IS_VALLEYVIEW(dev_priv))
+ if (IS_GEN5(dev_priv))
+ ironlake_set_rps(dev_priv, dev_priv->rps.idle_freq);
+ else if (IS_VALLEYVIEW(dev_priv))
vlv_set_rps_idle(dev_priv);
else
gen6_set_rps(dev_priv, dev_priv->rps.idle_freq);
@@ -4119,6 +4144,8 @@ void intel_rps_idle(struct drm_i915_private *dev_priv)
while (!list_empty(&dev_priv->rps.clients))
list_del_init(dev_priv->rps.clients.next);
mutex_unlock(&dev_priv->rps.hw_lock);
+
+ i915_update_gfx_val(dev_priv);
}
void intel_rps_boost(struct drm_i915_private *dev_priv,
@@ -4153,7 +4180,9 @@ void intel_rps_boost(struct drm_i915_private *dev_priv,
void intel_set_rps(struct drm_i915_private *dev_priv, u8 val)
{
- if (IS_VALLEYVIEW(dev_priv))
+ if (IS_GEN5(dev_priv))
+ ironlake_set_rps(dev_priv, val);
+ else if (IS_VALLEYVIEW(dev_priv))
valleyview_set_rps(dev_priv, val);
else if (INTEL_INFO(dev_priv)->gen > 6)
gen6_set_rps(dev_priv, val);
@@ -5366,11 +5395,9 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv)
dev_priv->ips.gfx_power = diff;
}
-void i915_update_gfx_val(struct drm_i915_private *dev_priv)
+static void i915_update_gfx_val(struct drm_i915_private *dev_priv)
{
- struct drm_device *dev = dev_priv->dev;
-
- if (INTEL_INFO(dev)->gen != 5)
+ if (INTEL_INFO(dev_priv)->gen != 5)
return;
spin_lock_irq(&mchdev_lock);
@@ -5419,10 +5446,9 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv)
unsigned long i915_gfx_val(struct drm_i915_private *dev_priv)
{
- struct drm_device *dev = dev_priv->dev;
unsigned long val;
- if (INTEL_INFO(dev)->gen != 5)
+ if (INTEL_INFO(dev_priv)->gen != 5)
return 0;
spin_lock_irq(&mchdev_lock);
@@ -5479,8 +5505,8 @@ bool i915_gpu_raise(void)
}
dev_priv = i915_mch_dev;
- if (dev_priv->ips.max_delay > dev_priv->ips.fmax)
- dev_priv->ips.max_delay--;
+ if (dev_priv->rps.max_freq_softlimit < dev_priv->rps.max_freq)
+ dev_priv->rps.max_freq_softlimit++;
out_unlock:
spin_unlock_irq(&mchdev_lock);
@@ -5507,8 +5533,8 @@ bool i915_gpu_lower(void)
}
dev_priv = i915_mch_dev;
- if (dev_priv->ips.max_delay < dev_priv->ips.min_delay)
- dev_priv->ips.max_delay++;
+ if (dev_priv->rps.max_freq_softlimit > dev_priv->rps.min_freq)
+ dev_priv->rps.max_freq_softlimit--;
out_unlock:
spin_unlock_irq(&mchdev_lock);
@@ -5562,9 +5588,10 @@ bool i915_gpu_turbo_disable(void)
}
dev_priv = i915_mch_dev;
- dev_priv->ips.max_delay = dev_priv->ips.fstart;
+ dev_priv->rps.max_freq_softlimit = dev_priv->rps.min_freq;
+ dev_priv->rps.enabled = false;
- if (!ironlake_set_drps(dev_priv->dev, dev_priv->ips.fstart))
+ if (!__ironlake_set_rps(dev_priv, dev_priv->rps.min_freq))
ret = false;
out_unlock:
@@ -5574,6 +5601,27 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
+bool i915_gpu_turbo_enable(void)
+{
+ struct drm_i915_private *dev_priv;
+ bool ret = true;
+
+ spin_lock_irq(&mchdev_lock);
+ if (!i915_mch_dev) {
+ ret = false;
+ goto out_unlock;
+ }
+ dev_priv = i915_mch_dev;
+
+ dev_priv->rps.max_freq_softlimit = dev_priv->rps.max_freq;
+ dev_priv->rps.enabled = true;
+
+out_unlock:
+ spin_unlock_irq(&mchdev_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(i915_gpu_turbo_enable);
+
/**
* Tells the intel_ips driver that the i915 driver is now loaded, if
* IPS got loaded first.
@@ -6844,7 +6892,7 @@ void intel_queue_rps_boost_for_request(struct drm_device *dev,
{
struct request_boost *boost;
- if (rq == NULL || INTEL_INFO(dev)->gen < 6)
+ if (rq == NULL || INTEL_INFO(dev)->gen < 5)
return;
if (i915_gem_request_completed(rq, true))
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index e2065e06a3f3..16030fbbd611 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -338,6 +338,7 @@ struct ips_driver {
bool (*gpu_lower)(void);
bool (*gpu_busy)(void);
bool (*gpu_turbo_disable)(void);
+ bool (*gpu_turbo_enable)(void);
/* For restoration at unload */
u64 orig_turbo_limit;
@@ -577,7 +578,11 @@ static void ips_enable_gpu_turbo(struct ips_driver *ips)
{
if (ips->__gpu_turbo_on)
return;
- ips->__gpu_turbo_on = true;
+
+ if (!ips->gpu_turbo_enable())
+ dev_err(&ips->dev->dev, "failed to enable graphics turbo\n");
+ else
+ ips->__gpu_turbo_on = true;
}
/**
@@ -1438,9 +1443,14 @@ static bool ips_get_i915_syms(struct ips_driver *ips)
ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
if (!ips->gpu_turbo_disable)
goto out_put_busy;
+ ips->gpu_turbo_enable = symbol_get(i915_gpu_turbo_enable);
+ if (!ips->gpu_turbo_enable)
+ goto out_put_disable;
return true;
+out_put_disable:
+ symbol_put(i915_gpu_turbo_disable);
out_put_busy:
symbol_put(i915_gpu_busy);
out_put_lower:
@@ -1702,6 +1712,8 @@ static void ips_remove(struct pci_dev *dev)
symbol_put(i915_gpu_busy);
if (ips->gpu_turbo_disable)
symbol_put(i915_gpu_turbo_disable);
+ if (ips->gpu_turbo_enable)
+ symbol_put(i915_gpu_turbo_enable);
rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 595f85c392ac..406710c30658 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -35,6 +35,7 @@ extern bool i915_gpu_raise(void);
extern bool i915_gpu_lower(void);
extern bool i915_gpu_busy(void);
extern bool i915_gpu_turbo_disable(void);
+extern bool i915_gpu_turbo_enable(void);
/*
* The Bridge device's PCI config space has information about the
--
2.1.4
More information about the Intel-gfx
mailing list