[Intel-gfx] [PATCH v2] Avoid pageflipping freeze when we apparently miss the flip prepare interrupt

Thu Sep 2 16:04:29 CEST 2010

When we miss the flip prepare interrupt, we never get into the
software state needed to restart userspace, resulting in a freeze of a
full-screen OpenGL application (such as a compositor).

Work around this by checking DSPxSURF/DSPxBASE to see if the page flip
has actually happened. If it has, do the work we would have done when
the flip prepare interrupt comes in.

Also, add debugfs information to tell us what's going on (based on the
patch from Chris Wilson attached to bugs.fdo bug #29798).

Signed-off-by: Simon Farnsworth <simon.farnsworth at onelan.co.uk>
---

After sending the previous patch, I thought about the problem a lot
more. We have two interesting cases when a full-screen application
is running:

1) The GPU is able to complete rendering in the time between the page
flip being queued and the next vblank. This should result in a
pageflip prepared interrupt before the vblank interrupt, but if it
doesn't, the stall check will detect that the page flip has happened
and recover without a visual glitch.

2) The rendering is queued late, or is too hefty for the GPU to
complete before the next vblank. In this case, we pay a small CPU
penalty checking to see if the page flip has happened, while the GPU
hasn't got far enough through the ring to have seen that we want it to
flip.

My intution is that case 2 is rare - in the case of my application (an
OpenGL compositor for digital signage), I expect to be woken up as
soon as the page flip has occured, queue some rendering and follow it
with a page flip request. Further, except in cases of extreme
overload, I expect to be able to queue everything up to the page flip
and have the GPU process it well before the next vblank.

Hence the stall check handler aims to short-circuit as soon as I can
determine that we've not got a page flip queued (don't waste time), or
if we're in case 1, and have received the page flip prepared interrupt
before vblank.

It only reads a hardware register (hopefully not a painful cost, but
still more expensive than checking two CPU-side variables rather than
just one) if we've hit the problem (when we have to check the GPU
register to detect it), or if we're in case 2.

 drivers/gpu/drm/i915/i915_debugfs.c  |   50 +++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_irq.c      |   54 ++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_display.c |   14 ++------
 drivers/gpu/drm/i915/intel_drv.h     |   10 ++++++
 4 files changed, 116 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 92d5605..b05ed83 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -33,6 +33,7 @@
 #include "drm.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include "intel_drv.h"
 
 #define DRM_I915_RING_DEBUG 1
 
@@ -121,6 +122,54 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_gem_pageflip_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	unsigned long flags;
+	struct intel_crtc *crtc;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
+		const char *pipe = crtc->pipe ? "B" : "A";
+		const char *plane = crtc->plane ? "B" : "A";
+		struct intel_unpin_work *work;
+
+		spin_lock_irqsave(&dev->event_lock, flags);
+		work = crtc->unpin_work;
+		if (work == NULL) {
+			seq_printf(m, "No flip due on pipe %s (plane %s)\n",
+				   pipe, plane);
+		} else {
+			if (!work->pending) {
+				seq_printf(m, "Flip queued on pipe %s (plane %s)\n",
+					   pipe, plane);
+			} else {
+				seq_printf(m, "Flip pending (waiting for vsync) on pipe %s (plane %s)\n",
+					   pipe, plane);
+			}
+			if (work->enable_stall_check)
+				seq_printf(m, "Stall check enabled, ");
+			else
+				seq_printf(m, "Stall check waiting for page flip ioctl, ");
+			seq_printf(m, "%d prepares\n", work->pending);
+
+			if (work->old_fb_obj) {
+				struct drm_i915_gem_object *obj_priv = to_intel_bo(work->old_fb_obj);
+				if(obj_priv)
+					seq_printf(m, "Old framebuffer gtt_offset 0x%08x\n", obj_priv->gtt_offset );
+			}
+			if (work->pending_flip_obj) {
+				struct drm_i915_gem_object *obj_priv = to_intel_bo(work->pending_flip_obj);
+				if(obj_priv)
+					seq_printf(m, "New framebuffer gtt_offset 0x%08x\n", obj_priv->gtt_offset );
+			}
+		}
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+
+	return 0;
+}
+
 static int i915_gem_request_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -777,6 +826,7 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
 	{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
 	{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
+	{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
 	{"i915_gem_request", i915_gem_request_info, 0},
 	{"i915_gem_seqno", i915_gem_seqno_info, 0},
 	{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 16861b8..662e6eb 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -887,6 +887,52 @@ static void i915_handle_error(struct drm_device *dev, bool wedged)
 	queue_work(dev_priv->wq, &dev_priv->error_work);
 }
 
+static void i915_pageflip_stall_check(struct drm_device *dev, int pipe)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int plane = intel_crtc->plane;
+	struct intel_unpin_work *work;
+	unsigned long flags;
+	bool stall_detected = false;
+	struct drm_i915_gem_object *obj_priv;
+
+	/* Ignore early vblank irqs */
+	if (intel_crtc == NULL)
+		return;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	work = intel_crtc->unpin_work;
+
+	if (work == NULL || work->pending || !work->enable_stall_check) {
+		/* Either the pending flip IRQ arrived, or we're too early. Don't check */
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+		return;
+	}
+
+	/* Potential stall - if we see that the flip has happened, assume a missed interrupt */
+	obj_priv = to_intel_bo(work->pending_flip_obj);
+	if(IS_I965G(dev)) {
+		int dspsurf = (plane == 0 ? DSPASURF : DSPBSURF);
+		stall_detected = I915_READ(dspsurf) == obj_priv->gtt_offset;
+	}
+	else {
+		int dspaddr = (plane == 0 ? DSPAADDR : DSPBADDR);
+		stall_detected = I915_READ(dspaddr) == (obj_priv->gtt_offset +
+							crtc->y * crtc->fb->pitch +
+							crtc->x * crtc->fb->bits_per_pixel/8);
+	}
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	if (stall_detected) {
+		DRM_DEBUG_DRIVER("Pageflip stall detected\n");
+		intel_prepare_page_flip(dev, plane);
+	}
+	return;
+}
+
 irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -1004,15 +1050,19 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 		if (pipea_stats & vblank_status) {
 			vblank++;
 			drm_handle_vblank(dev, 0);
-			if (!dev_priv->flip_pending_is_done)
+			if (!dev_priv->flip_pending_is_done) {
+				i915_pageflip_stall_check(dev, 0);
 				intel_finish_page_flip(dev, 0);
+			}
 		}
 
 		if (pipeb_stats & vblank_status) {
 			vblank++;
 			drm_handle_vblank(dev, 1);
-			if (!dev_priv->flip_pending_is_done)
+			if (!dev_priv->flip_pending_is_done) {
+				i915_pageflip_stall_check(dev, 1);
 				intel_finish_page_flip(dev, 1);
+			}
 		}
 
 		if ((pipea_stats & PIPE_LEGACY_BLC_EVENT_STATUS) ||
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 23157e1..d0067c6 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4911,15 +4911,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
 	kfree(intel_crtc);
 }
 
-struct intel_unpin_work {
-	struct work_struct work;
-	struct drm_device *dev;
-	struct drm_gem_object *old_fb_obj;
-	struct drm_gem_object *pending_flip_obj;
-	struct drm_pending_vblank_event *event;
-	int pending;
-};
-
 static void intel_unpin_work_fn(struct work_struct *__work)
 {
 	struct intel_unpin_work *work =
@@ -5007,7 +4998,8 @@ void intel_prepare_page_flip(struct drm_device *dev, int plane)
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 	if (intel_crtc->unpin_work) {
-		intel_crtc->unpin_work->pending = 1;
+		if ((++intel_crtc->unpin_work->pending) > 1)
+			DRM_ERROR("Prepared flip multiple times\n");
 	} else {
 		DRM_DEBUG_DRIVER("preparing flip with no unpin work?\n");
 	}
@@ -5089,6 +5081,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		ADVANCE_LP_RING();
 	}
 
+	work->enable_stall_check = true;
+
 	/* Offset into the new buffer for cases of shared fbs between CRTCs */
 	offset = obj_priv->gtt_offset;
 	offset += (crtc->y * fb->pitch) + (crtc->x * (fb->bits_per_pixel) / 8);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0e92aa0..ad312ca 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -176,6 +176,16 @@ struct intel_crtc {
 #define enc_to_intel_encoder(x) container_of(x, struct intel_encoder, enc)
 #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base)
 
+struct intel_unpin_work {
+	struct work_struct work;
+	struct drm_device *dev;
+	struct drm_gem_object *old_fb_obj;
+	struct drm_gem_object *pending_flip_obj;
+	struct drm_pending_vblank_event *event;
+	int pending;
+	bool enable_stall_check;
+};
+
 struct i2c_adapter *intel_i2c_create(struct drm_device *dev, const u32 reg,
 				     const char *name);
 void intel_i2c_destroy(struct i2c_adapter *adapter);
-- 
1.7.0.1