[Intel-gfx] [PATCH 5/8] drm/i915: Implement GPU reset

Sat Aug 8 22:34:44 CEST 2009

This patch puts in place the machinery to attempt to reset the GPU. This
will be used when attempting to recover from a GPU hang.

Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
Signed-off-by: Ben Gamari <bgamari.foss at gmail.com>
---
 drivers/gpu/drm/i915/i915_dma.c |    8 +++
 drivers/gpu/drm/i915/i915_drv.c |  134 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h |    1 +
 drivers/gpu/drm/i915/i915_irq.c |    8 +++
 drivers/gpu/drm/i915/i915_reg.h |    4 +
 5 files changed, 155 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e1489be..f8144fe 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1006,6 +1006,9 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	/* Basic memrange allocator for stolen space (aka vram) */
 	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
 
+	/* We're off and running w/KMS */
+	dev_priv->mm.suspended = 0;
+
 	/* Let GEM Manage from end of prealloc space to end of aperture.
 	 *
 	 * However, leave one page at the end still bound to the scratch page.
@@ -1017,7 +1020,9 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	 */
 	i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
 
+	mutex_lock(&dev->struct_mutex);
 	ret = i915_gem_init_ringbuffer(dev);
+	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		goto out;
 
@@ -1248,6 +1253,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		return ret;
 	}
 
+	/* Start out suspended */
+	dev_priv->mm.suspended = 1;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		ret = i915_load_modeset_init(dev, prealloc_size, agp_size);
 		if (ret < 0) {
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index fc4b68a..a88efb5 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -37,6 +37,7 @@
 #include <linux/console.h>
 #include "drm_crtc_helper.h"
 
+
 static unsigned int i915_modeset = -1;
 module_param_named(modeset, i915_modeset, int, 0400);
 
@@ -124,6 +125,139 @@ static int i915_resume(struct drm_device *dev)
 	return ret;
 }
 
+/**
+ * i965_reset - reset chip after a hang
+ * @dev: drm device to reset
+ * @flags: reset domains
+ *
+ * Reset the chip.  Useful if a hang is detected.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+void i965_reset(struct drm_device *dev, u8 flags)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	unsigned long timeout;
+	u8 gdrst;
+	bool need_display = true; //!(flags & (GDRST_RENDER | GDRST_MEDIA));
+
+#if defined(CONFIG_SMP)
+       timeout = jiffies + msecs_to_jiffies(500);
+	do {
+		udelay(100);
+	} while (mutex_is_locked(&dev->struct_mutex) && time_after(timeout, jiffies));
+
+	if (mutex_is_locked(&dev->struct_mutex)) {
+#if 1
+		DRM_ERROR("i915 struct_mutex lock is still held by %s. Giving on up reset.\n", dev->struct_mutex.owner->task->comm);
+		return;
+#else
+		struct task_struct *task = dev->struct_mutex.owner->task;
+		DRM_ERROR("Killing process %d (%s) for holding i915 device mutex\n", 
+				task->pid, task->comm);
+		force_sig(SIGILL, task);
+#endif
+	}
+#else
+	BUG_ON(mutex_is_locked(&dev->struct_mutex));
+#endif
+
+	debug_show_all_locks();
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Clear request list
+	 */
+	i915_gem_retire_requests(dev);
+
+	if (need_display)
+		i915_save_display(dev);
+
+	if (IS_I965G(dev) || IS_G4X(dev)) {
+		/*
+		 * Set the domains we want to reset, then the reset bit (bit 0), 
+		 * and finally wait for hardware to clear it.
+		 */
+		pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+		pci_write_config_byte(dev->pdev, GDRST, gdrst | flags);
+		udelay(50);
+		pci_write_config_byte(dev->pdev, GDRST, gdrst & 0x1);
+
+		/* ...we don't want to loop forever though, 500ms should be plenty */
+	       timeout = jiffies + msecs_to_jiffies(500);
+		do {
+			udelay(100);
+			pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+		} while ((gdrst & 0x1) && time_after(timeout, jiffies));
+	} else {
+		DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
+		return;
+	}
+
+	/* Ok, now get things going again... */
+
+	/*
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.  Fortunately we don't need to do this unless we reset the
+	 * chip at a PCI level.
+	 *
+	 * Next we need to restore the context, but we don't use those
+	 * yet either...
+	 *
+	 * Ring buffer needs to be re-initialized in the KMS case, or if X
+	 * was running at the time of the reset (i.e. we weren't VT
+	 * switched away).
+	 */
+	if (drm_core_check_feature(dev, DRIVER_MODESET) ||
+	    !dev_priv->mm.suspended) {
+		drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+		struct drm_gem_object *obj = ring->ring_obj;
+		struct drm_i915_gem_object *obj_priv = obj->driver_private;
+		dev_priv->mm.suspended = 0;
+
+		/* Stop the ring if it's running. */
+		I915_WRITE(PRB0_CTL, 0);
+		I915_WRITE(PRB0_TAIL, 0);
+		I915_WRITE(PRB0_HEAD, 0);
+
+		/* Initialize the ring. */
+		I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+		I915_WRITE(PRB0_CTL,
+			   ((obj->size - 4096) & RING_NR_PAGES) |
+			   RING_NO_REPORT |
+			   RING_VALID);
+		if (!drm_core_check_feature(dev, DRIVER_MODESET))
+			i915_kernel_lost_context(dev);
+		else {
+			ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+			ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+			ring->space = ring->head - (ring->tail + 8);
+			if (ring->space < 0)
+				ring->space += ring->Size;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+		drm_irq_uninstall(dev);
+		drm_irq_install(dev);
+		mutex_lock(&dev->struct_mutex);
+	}
+
+	/*
+	 * Display needs restore too...
+	 */
+	if (need_display)
+		i915_restore_display(dev);
+
+	mutex_unlock(&dev->struct_mutex);
+}
+
+
 static int __devinit
 i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e3634d2..6c7c790 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -596,6 +596,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
 extern int i915_emit_box(struct drm_device *dev,
 			 struct drm_clip_rect *boxes,
 			 int i, int DR1, int DR4);
+extern void i965_reset(struct drm_device *dev, u8 flags);
 
 /* i915_irq.c */
 void i915_hangcheck_elapsed(unsigned long data);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c47d5b4..6e234ad 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -482,6 +482,14 @@ static void i915_handle_error(struct drm_device *dev)
 		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
 	}
 
+	if (dev_priv->mm.wedged) {
+		/*
+		 * Wakeup waiting processes so they don't hang
+		 */
+		printk("i915: Waking up sleeping processes\n");
+		DRM_WAKEUP(&dev_priv->irq_queue);
+	}
+
 	queue_work(dev_priv->wq, &dev_priv->error_work);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 2955083..6b6113d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -66,6 +66,10 @@
 #define   GC_DISPLAY_CLOCK_333_MHZ	(4 << 4)
 #define   GC_DISPLAY_CLOCK_MASK		(7 << 4)
 #define LBB	0xf4
+#define GDRST 0xc0
+#define  GDRST_FULL	(0<<2)
+#define  GDRST_RENDER	(1<<2)
+#define  GDRST_MEDIA	(3<<2)
 
 /* VGA stuff */
 
-- 
1.6.3.3