[Intel-gfx] [PATCH 8/8] drm/i915: irq-drive the dp aux communication

Sun Sep 9 11:00:47 CEST 2012

At least on the platforms that have a dp aux irq and also have it
enabled - vlv/hsw should have one, too. But I don't have a machine to
test this on, and the current code doesn't support dp yet anyway on
those platforms. Judging from docs there's no dp aux interrupt for gm45.

Also, I only have an ivb cpu edp machine, so the dp aux A code for
snb/ilk is untested.

For dpcd probing when nothing is connected it slashes about 5ms of cpu
time (cpu time is now negligible), which agrees with 3 * 5 400 usec
timeouts.

A previous version of this patch increases the time required to go
through the dp_detect cycle (which includes reading the edid) from
around 33 ms to around 40 ms. Experiments indicated that this is
purely due to the irq latency - the hw doesn't allow us to queue up
dp aux transactions and hence irq latency directly affects throughput.
gmbus is much better, there we have a 8 byte buffer, and we get the
irq once another 4 bytes can be queued up.

But by using the pm_qos interface to request the lowest possible cpu
wake-up latency this slowdown completely disappeared.

Since all our output detection logic is single-threaded with the
mode_config mutex right now anyway, I've decide not ot play fancy and
to just reuse the gmbus wait queue. But this would definitely prep the
way to run dp detection on different ports in parallel

v2: Add a timeout for dp aux transfers when using interrupts - the hw
_does_  prevent this with the hw-based 400 usec timeout, but if the
irq somehow doesn't arrive we're screwed. Lesson learned while
developing this ;-)

v3: While at it also convert the busy-loop to wait_for_atomic, so that
we don't run the risk of an infinite loop any more.

v4: Ensure we have the smallest possible irq latency by using the
pm_qos interface.

Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
 drivers/gpu/drm/i915/i915_dma.c |  1 +
 drivers/gpu/drm/i915/i915_drv.h |  4 +++
 drivers/gpu/drm/i915/i915_irq.c |  6 +++++
 drivers/gpu/drm/i915/intel_dp.c | 55 +++++++++++++++++++++++++++++++----------
 4 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 068b49b..568913b 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1761,6 +1761,7 @@ int i915_driver_unload(struct drm_device *dev)
 	intel_teardown_mchbar(dev);
 
 	destroy_workqueue(dev_priv->wq);
+	pm_qos_remove_request(&dev_priv->pm_qos);
 
 	pci_dev_put(dev_priv->bridge_dev);
 	kfree(dev->dev_private);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 13b9e6a..50c6edf 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/pm_qos.h>
 
 /* General customization:
  */
@@ -437,6 +438,9 @@ typedef struct drm_i915_private {
 	/* protects the irq masks */
 	spinlock_t irq_lock;
 
+	/* To control wakeup latency, e.g. for irq-driven dp aux transfers. */
+	struct pm_qos_request pm_qos;
+
 	/* DPIO indirect register protection */
 	spinlock_t dpio_lock;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c483531..bcb2624 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -607,7 +607,11 @@ static void gmbus_irq_handler(struct drm_device *dev)
 
 static void dp_aux_irq_handler(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = (drm_i915_private_t *) dev->dev_private;
+
 	DRM_DEBUG_DRIVER("AUX channel interrupt\n");
+
+	wake_up_all(&dev_priv->gmbus_wait_queue);
 }
 
 static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir)
@@ -2718,6 +2722,8 @@ void intel_irq_init(struct drm_device *dev)
 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
 	INIT_WORK(&dev_priv->parity_error_work, ivybridge_parity_work);
 
+	pm_qos_add_request(&dev_priv->pm_qos, PM_QOS_CPU_DMA_LATENCY, 0);
+
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
 	if (IS_G4X(dev) || INTEL_INFO(dev)->gen >= 5) {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index f28353d..5d98a53 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -340,6 +340,28 @@ intel_dp_check_edp(struct intel_dp *intel_dp)
 	}
 }
 
+static uint32_t
+intel_dp_aux_wait_done(struct intel_dp *intel_dp, bool has_aux_irq)
+{
+	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t ch_ctl = intel_dp->output_reg + 0x10;
+	uint32_t status;
+	bool done;
+
+#define C (((status = I915_READ(ch_ctl)) & DP_AUX_CH_CTL_SEND_BUSY) == 0)
+	if (has_aux_irq)
+		done = wait_event_timeout(dev_priv->gmbus_wait_queue, C, 10);
+	else
+		done = wait_for_atomic(C, 10) == 0;
+	if (!done)
+		DRM_ERROR("dp aux hw did not signal timeout (has irq: %i)!\n",
+			  has_aux_irq);
+#undef C
+
+	return status;
+}
+
 static int
 intel_dp_aux_ch(struct intel_dp *intel_dp,
 		uint8_t *send, int send_bytes,
@@ -350,11 +372,13 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t ch_ctl = output_reg + 0x10;
 	uint32_t ch_data = ch_ctl + 4;
-	int i;
-	int recv_bytes;
+	int i, ret, recv_bytes;
 	uint32_t status;
 	uint32_t aux_clock_divider;
 	int try, precharge;
+	bool has_aux_irq = INTEL_INFO(dev)->gen >= 5;
+
+	pm_qos_update_request(&dev_priv->pm_qos, 0);
 
 	intel_dp_check_edp(intel_dp);
 	/* The clock divider is based off the hrawclk,
@@ -390,7 +414,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	if (try == 3) {
 		WARN(1, "dp_aux_ch not started status 0x%08x\n",
 		     I915_READ(ch_ctl));
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out;
 	}
 
 	/* Must try at least 3 times according to DP spec */
@@ -403,6 +428,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 		/* Send the command and wait for it to complete */
 		I915_WRITE(ch_ctl,
 			   DP_AUX_CH_CTL_SEND_BUSY |
+			   (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) |
 			   DP_AUX_CH_CTL_TIME_OUT_400us |
 			   (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
 			   (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
@@ -410,12 +436,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 			   DP_AUX_CH_CTL_DONE |
 			   DP_AUX_CH_CTL_TIME_OUT_ERROR |
 			   DP_AUX_CH_CTL_RECEIVE_ERROR);
-		for (;;) {
-			status = I915_READ(ch_ctl);
-			if ((status & DP_AUX_CH_CTL_SEND_BUSY) == 0)
-				break;
-			udelay(100);
-		}
+
+		status = intel_dp_aux_wait_done(intel_dp, has_aux_irq);
 
 		/* Clear done status and any errors */
 		I915_WRITE(ch_ctl,
@@ -433,7 +455,8 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 
 	if ((status & DP_AUX_CH_CTL_DONE) == 0) {
 		DRM_ERROR("dp_aux_ch not done status 0x%08x\n", status);
-		return -EBUSY;
+		ret = -EBUSY;
+		goto out;
 	}
 
 	/* Check for timeout or receive error.
@@ -441,14 +464,16 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	 */
 	if (status & DP_AUX_CH_CTL_RECEIVE_ERROR) {
 		DRM_ERROR("dp_aux_ch receive error status 0x%08x\n", status);
-		return -EIO;
+		ret = -EIO;
+		goto out;
 	}
 
 	/* Timeouts occur when the device isn't connected, so they're
 	 * "normal" -- don't fill the kernel log with these */
 	if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) {
 		DRM_DEBUG_KMS("dp_aux_ch timeout status 0x%08x\n", status);
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto out;
 	}
 
 	/* Unload any bytes sent back from the other side */
@@ -461,7 +486,11 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 		unpack_aux(I915_READ(ch_data + i),
 			   recv + i, recv_bytes - i);
 
-	return recv_bytes;
+	ret = recv_bytes;
+out:
+	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
+
+	return ret;
 }
 
 /* Write data to the aux channel in native mode */
-- 
1.7.11.2