[Intel-gfx] [PATCH v3 4/7] drm/i915: add support for checking if we hold an RPM reference

Imre Deak imre.deak at intel.com
Thu Nov 12 08:40:18 PST 2015


Atm, we assert that the device is not suspended until the point when the
HW is truly put to a suspended state. This is fine, but we can catch
more problems if we check the RPM refcount. After that one drops to zero
we shouldn't access the HW any more, although the actual suspend may be
delayed. Based on this change the assert_rpm_wakelock_held helper to
check the refcount instead of the device suspended state.

After this change we need to annotate every place explicitly in the code
where we expect that the HW is in D0 state. Atm in the driver load
function the D0 state is implicit until we enable runtime PM, but for
these asserts to work we need to add explicit RPM get/put calls, so fix
this up.

Another place where the D0 state is implicit even with a 0 RPM refcount
is the system and runtime sudpend/resume handlers and the hangcheck
work. In the former case the susend/resume handlers themselves determine
at which exact spot the HW is truly on/off and in the latter case the
work will be flushed in the suspend handler before turning off the HW.
We regard these cases special and disable the RPM asserts for their
duration. In the hangcheck work we can nevertheless check that the
device is not suspended. Fix up these things.

These explicit annotations also have the positive side effect of
documenting our assumptions better.

This caught additional WARNs from the atomic modeset path, those should
be fixed separately.

v2:
- remove the redundant HAS_RUNTIME_PM check (moved to patch 1) (Ville)
v3:
- use a new dedicated RPM wakelock refcount to also catch cases where
  our own RPM get/put functions were not called (Chris)
- assert also that the new RPM wakelock refcount is 0 in the RPM
  suspend handler (Chris)
- change the assert error message to be more meaningful (Chris)
- prevent false assert errors and check that the RPM wakelock is 0 in
  the RPM resume handler too
- prevent false assert errors in the hangcheck work too
- add a device not suspended assert check to the hangcheck work

Signed-off-by: Imre Deak <imre.deak at intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c         |  7 ++++++
 drivers/gpu/drm/i915/i915_drv.c         | 39 +++++++++++++++++++++++++++++----
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_irq.c         | 12 ++++++++--
 drivers/gpu/drm/i915/intel_drv.h        | 39 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_pm.c         |  1 +
 drivers/gpu/drm/i915/intel_runtime_pm.c |  6 +++++
 7 files changed, 99 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a7289be..780cbcd 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -896,6 +896,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	intel_pm_setup(dev);
 
+	intel_runtime_pm_get(dev_priv);
+
 	intel_display_crc_init(dev);
 
 	i915_dump_device_info(dev_priv);
@@ -1085,6 +1087,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	i915_audio_component_init(dev_priv);
 
+	intel_runtime_pm_put(dev_priv);
+
 	return 0;
 
 out_power_well:
@@ -1120,6 +1124,9 @@ free_priv:
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
 	kmem_cache_destroy(dev_priv->objects);
+
+	intel_runtime_pm_put(dev_priv);
+
 	kfree(dev_priv);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 77d183d..24d21bf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -636,6 +636,8 @@ static int i915_drm_suspend(struct drm_device *dev)
 	dev_priv->modeset_restore = MODESET_SUSPENDED;
 	mutex_unlock(&dev_priv->modeset_restore_lock);
 
+	disable_rpm_asserts(dev_priv);
+
 	/* We do a lot of poking in a lot of registers, make sure they work
 	 * properly. */
 	intel_display_set_init_power(dev_priv, true);
@@ -648,7 +650,7 @@ static int i915_drm_suspend(struct drm_device *dev)
 	if (error) {
 		dev_err(&dev->pdev->dev,
 			"GEM idle failed, resume might fail\n");
-		return error;
+		goto out;
 	}
 
 	intel_guc_suspend(dev);
@@ -695,7 +697,10 @@ static int i915_drm_suspend(struct drm_device *dev)
 	if (HAS_CSR(dev_priv))
 		flush_work(&dev_priv->csr.work);
 
-	return 0;
+out:
+	enable_rpm_asserts(dev_priv);
+
+	return error;
 }
 
 static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
@@ -703,6 +708,8 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
 	int ret;
 
+	disable_rpm_asserts(dev_priv);
+
 	intel_power_domains_suspend(dev_priv);
 
 	ret = intel_suspend_complete(dev_priv);
@@ -710,7 +717,7 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 	if (ret) {
 		DRM_ERROR("Suspend complete failed: %d\n", ret);
 
-		return ret;
+		goto out;
 	}
 
 	pci_disable_device(drm_dev->pdev);
@@ -729,7 +736,10 @@ static int i915_drm_suspend_late(struct drm_device *drm_dev, bool hibernation)
 	if (!(hibernation && INTEL_INFO(dev_priv)->gen < 6))
 		pci_set_power_state(drm_dev->pdev, PCI_D3hot);
 
-	return 0;
+out:
+	enable_rpm_asserts(dev_priv);
+
+	return ret;
 }
 
 int i915_suspend_switcheroo(struct drm_device *dev, pm_message_t state)
@@ -760,6 +770,8 @@ static int i915_drm_resume(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	disable_rpm_asserts(dev_priv);
+
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_restore_gtt_mappings(dev);
 	mutex_unlock(&dev->struct_mutex);
@@ -824,6 +836,8 @@ static int i915_drm_resume(struct drm_device *dev)
 
 	drm_kms_helper_poll_enable(dev);
 
+	enable_rpm_asserts(dev_priv);
+
 	return 0;
 }
 
@@ -846,6 +860,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
 	pci_set_master(dev->pdev);
 
+	disable_rpm_asserts(dev_priv);
+
 	if (IS_VALLEYVIEW(dev_priv))
 		ret = vlv_resume_prepare(dev_priv, false);
 	if (ret)
@@ -862,6 +878,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
 	intel_uncore_sanitize(dev);
 	intel_power_domains_init_hw(dev_priv, true);
 
+	enable_rpm_asserts(dev_priv);
+
 	return ret;
 }
 
@@ -1494,6 +1512,9 @@ static int intel_runtime_suspend(struct device *device)
 
 		return -EAGAIN;
 	}
+
+	disable_rpm_asserts(dev_priv);
+
 	/*
 	 * We are safe here against re-faults, since the fault handler takes
 	 * an RPM reference.
@@ -1511,11 +1532,16 @@ static int intel_runtime_suspend(struct device *device)
 		DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret);
 		intel_runtime_pm_enable_interrupts(dev_priv);
 
+		enable_rpm_asserts(dev_priv);
+
 		return ret;
 	}
 
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	intel_uncore_forcewake_reset(dev, false);
+
+	enable_rpm_asserts(dev_priv);
+	WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakelock_count));
 	dev_priv->pm.suspended = true;
 
 	/*
@@ -1559,6 +1585,9 @@ static int intel_runtime_resume(struct device *device)
 
 	DRM_DEBUG_KMS("Resuming device\n");
 
+	WARN_ON_ONCE(atomic_read(&dev_priv->pm.wakelock_count));
+	disable_rpm_asserts(dev_priv);
+
 	intel_opregion_notify_adapter(dev, PCI_D0);
 	dev_priv->pm.suspended = false;
 
@@ -1593,6 +1622,8 @@ static int intel_runtime_resume(struct device *device)
 
 	intel_enable_gt_powersave(dev);
 
+	enable_rpm_asserts(dev_priv);
+
 	if (ret)
 		DRM_ERROR("Runtime resume failed, disabling it (%d)\n", ret);
 	else
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5628c5a..658cb9b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1599,6 +1599,7 @@ struct skl_wm_level {
  * For more, read the Documentation/power/runtime_pm.txt.
  */
 struct i915_runtime_pm {
+	atomic_t wakelock_count;
 	bool suspended;
 	bool irqs_enabled;
 };
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 825114a..ee3ef69 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -2962,6 +2962,9 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	if (!i915.enable_hangcheck)
 		return;
 
+	assert_rpm_device_not_suspended(dev_priv);
+	disable_rpm_asserts(dev_priv);
+
 	for_each_ring(ring, dev_priv, i) {
 		u64 acthd;
 		u32 seqno;
@@ -3053,13 +3056,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 		}
 	}
 
-	if (rings_hung)
-		return i915_handle_error(dev, true, "Ring hung");
+	if (rings_hung) {
+		i915_handle_error(dev, true, "Ring hung");
+		goto out;
+	}
 
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
 		 * being added */
 		i915_queue_hangcheck(dev);
+
+out:
+	enable_rpm_asserts(dev_priv);
 }
 
 void i915_queue_hangcheck(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index fabf639..ee403d7 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -1439,6 +1439,45 @@ static inline void
 assert_rpm_wakelock_held(struct drm_i915_private *dev_priv)
 {
 	assert_rpm_device_not_suspended(dev_priv);
+	WARN_ONCE(!atomic_read(&dev_priv->pm.wakelock_count),
+		  "RPM wakelock not held during HW access");
+}
+
+/**
+ * disable_rpm_asserts - disable the RPM assert checks
+ * @dev_priv: i915 device instance
+ *
+ * This function disables all the RPM assert checks. It's meant to be used
+ * only in special circumstances where our rule about the RPM refcount wrt.
+ * the device power state doesn't hold. According to this rule at any point
+ * where we access the HW or want to keep the HW in an active state we must
+ * hold an RPM reference acquired via one of the intel_runtime_pm_get()
+ * helpers. Currently there are a few special spots where this rule doesn't
+ * hold: the suspend/resume handlers, the forcewake release timer, and the
+ * GPU hangcheck work. All other users should avoid using this function.
+ *
+ * Any calls to this function must have a symmetric call to
+ * enable_rpm_asserts().
+ */
+static inline void disable_rpm_asserts(struct drm_i915_private *dev_priv)
+{
+	atomic_inc(&dev_priv->pm.wakelock_count);
+}
+
+/**
+ * enable_rpm_asserts - re-enable the RPM assert checks
+ * @dev_priv: i915 device instance
+ *
+ * This function re-enables the RPM assert checks after disabling them with
+ * disable_rpm_asserts. It's meant to be used only in special circumstances
+ * otherwise its use should be avoided.
+ *
+ * Any calls to this function must have a symmetric call to
+ * disable_rpm_asserts().
+ */
+static inline void enable_rpm_asserts(struct drm_i915_private *dev_priv)
+{
+	atomic_dec(&dev_priv->pm.wakelock_count);
 }
 
 void intel_runtime_pm_get(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 647c0ff..6d74d32 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7214,4 +7214,5 @@ void intel_pm_setup(struct drm_device *dev)
 	INIT_LIST_HEAD(&dev_priv->rps.mmioflips.link);
 
 	dev_priv->pm.suspended = false;
+	atomic_set(&dev_priv->pm.wakelock_count, 0);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 3d7ddc3..64da5af 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2130,6 +2130,8 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
 	struct device *device = &dev->pdev->dev;
 
 	pm_runtime_get_sync(device);
+
+	atomic_inc(&dev_priv->pm.wakelock_count);
 	assert_rpm_wakelock_held(dev_priv);
 }
 
@@ -2157,6 +2159,8 @@ void intel_runtime_pm_get_noresume(struct drm_i915_private *dev_priv)
 
 	assert_rpm_wakelock_held(dev_priv);
 	pm_runtime_get_noresume(device);
+
+	atomic_inc(&dev_priv->pm.wakelock_count);
 }
 
 /**
@@ -2172,6 +2176,8 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv)
 	struct drm_device *dev = dev_priv->dev;
 	struct device *device = &dev->pdev->dev;
 
+	atomic_dec(&dev_priv->pm.wakelock_count);
+
 	pm_runtime_mark_last_busy(device);
 	pm_runtime_put_autosuspend(device);
 }
-- 
2.5.0



More information about the Intel-gfx mailing list