[Intel-gfx] [PATCH 5/6] drm/i915: Implement GPU reset

Sun Sep 13 16:51:01 CEST 2009

This patch puts in place the machinery to attempt to reset the GPU. This
will be used when attempting to recover from a GPU hang.

Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
Signed-off-by: Ben Gamari <bgamari.foss at gmail.com>
---
 drivers/gpu/drm/i915/i915_dma.c |    8 ++
 drivers/gpu/drm/i915/i915_drv.c |  141 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h |    1 +
 drivers/gpu/drm/i915/i915_irq.c |    8 ++
 drivers/gpu/drm/i915/i915_reg.h |    4 +
 5 files changed, 162 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index aabe41b..25f8e75 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1173,6 +1173,9 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	drm_mm_init(&dev_priv->vram, 0, prealloc_size);
 	DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
 
+	/* We're off and running w/KMS */
+	dev_priv->mm.suspended = 0;
+
 	/* Let GEM Manage from end of prealloc space to end of aperture.
 	 *
 	 * However, leave one page at the end still bound to the scratch page.
@@ -1184,7 +1187,9 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	 */
 	i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
 
+	mutex_lock(&dev->struct_mutex);
 	ret = i915_gem_init_ringbuffer(dev);
+	mutex_unlock(&dev->struct_mutex);
 	if (ret)
 		goto out;
 
@@ -1433,6 +1438,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 		return ret;
 	}
 
+	/* Start out suspended */
+	dev_priv->mm.suspended = 1;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		ret = i915_load_modeset_init(dev, prealloc_start,
 					     prealloc_size, agp_size);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index dbe568c..0c03a2a 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -127,6 +127,147 @@ static int i915_resume(struct drm_device *dev)
 	return ret;
 }
 
+/**
+ * i965_reset - reset chip after a hang
+ * @dev: drm device to reset
+ * @flags: reset domains
+ *
+ * Reset the chip.  Useful if a hang is detected.
+ *
+ * Procedure is fairly simple:
+ *   - reset the chip using the reset reg
+ *   - re-init context state
+ *   - re-init hardware status page
+ *   - re-init ring buffer
+ *   - re-init interrupt state
+ *   - re-init display
+ */
+void i965_reset(struct drm_device *dev, u8 flags)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	unsigned long timeout;
+	u8 gdrst;
+	bool need_display = true; //!(flags & (GDRST_RENDER | GDRST_MEDIA));
+
+#if defined(CONFIG_SMP)
+       timeout = jiffies + msecs_to_jiffies(500);
+	do {
+		udelay(100);
+	} while (mutex_is_locked(&dev->struct_mutex) && time_after(timeout, jiffies));
+
+	if (mutex_is_locked(&dev->struct_mutex)) {
+#if 1
+		DRM_ERROR("i915 struct_mutex lock is still held by %s. Giving on up reset.\n", dev->struct_mutex.owner->task->comm);
+		return;
+#else
+		struct task_struct *task = dev->struct_mutex.owner->task;
+		DRM_ERROR("Killing process %d (%s) for holding i915 device mutex\n", 
+				task->pid, task->comm);
+		force_sig(SIGILL, task);
+#endif
+	}
+#else
+	BUG_ON(mutex_is_locked(&dev->struct_mutex));
+#endif
+
+	debug_show_all_locks();
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * Clear request list
+	 */
+	i915_gem_retire_requests(dev);
+
+	if (need_display)
+		i915_save_display(dev);
+
+	if (IS_I965G(dev) || IS_G4X(dev)) {
+		/*
+		 * Set the domains we want to reset, then the reset bit (bit 0).
+		 * Clear the reset bit after a while and wait for hardware status
+		 * bit (bit 1) to be set
+		 */
+		pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+		//TODO: Set domains
+		pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
+		udelay(50);
+		pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
+
+		/* ...we don't want to loop forever though, 500ms should be plenty */
+	       timeout = jiffies + msecs_to_jiffies(500);
+		do {
+			udelay(100);
+			pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+		} while ((gdrst & 0x1) && time_after(timeout, jiffies));
+
+		if (gdrst & 0x1) {
+			DRM_ERROR("Failed to reset chip. You're screwed.");
+			mutex_unlock(&dev->struct_mutex);
+			return;
+		}
+	} else {
+		DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
+		return;
+	}
+
+	/* Ok, now get things going again... */
+
+	/*
+	 * Everything depends on having the GTT running, so we need to start
+	 * there.  Fortunately we don't need to do this unless we reset the
+	 * chip at a PCI level.
+	 *
+	 * Next we need to restore the context, but we don't use those
+	 * yet either...
+	 *
+	 * Ring buffer needs to be re-initialized in the KMS case, or if X
+	 * was running at the time of the reset (i.e. we weren't VT
+	 * switched away).
+	 */
+	if (drm_core_check_feature(dev, DRIVER_MODESET) ||
+	    !dev_priv->mm.suspended) {
+		drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+		struct drm_gem_object *obj = ring->ring_obj;
+		struct drm_i915_gem_object *obj_priv = obj->driver_private;
+		dev_priv->mm.suspended = 0;
+
+		/* Stop the ring if it's running. */
+		I915_WRITE(PRB0_CTL, 0);
+		I915_WRITE(PRB0_TAIL, 0);
+		I915_WRITE(PRB0_HEAD, 0);
+
+		/* Initialize the ring. */
+		I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+		I915_WRITE(PRB0_CTL,
+			   ((obj->size - 4096) & RING_NR_PAGES) |
+			   RING_NO_REPORT |
+			   RING_VALID);
+		if (!drm_core_check_feature(dev, DRIVER_MODESET))
+			i915_kernel_lost_context(dev);
+		else {
+			ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+			ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+			ring->space = ring->head - (ring->tail + 8);
+			if (ring->space < 0)
+				ring->space += ring->Size;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+		drm_irq_uninstall(dev);
+		drm_irq_install(dev);
+		mutex_lock(&dev->struct_mutex);
+	}
+
+	/*
+	 * Display needs restore too...
+	 */
+	if (need_display)
+		i915_restore_display(dev);
+
+	mutex_unlock(&dev->struct_mutex);
+}
+
+
 static int __devinit
 i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index afbcaa9..8797777 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -624,6 +624,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
 extern int i915_emit_box(struct drm_device *dev,
 			 struct drm_clip_rect *boxes,
 			 int i, int DR1, int DR4);
+extern void i965_reset(struct drm_device *dev, u8 flags);
 
 /* i915_irq.c */
 void i915_hangcheck_elapsed(unsigned long data);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 126696a..dbfcf0a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -482,6 +482,14 @@ static void i915_handle_error(struct drm_device *dev)
 		I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
 	}
 
+	if (dev_priv->mm.wedged) {
+		/*
+		 * Wakeup waiting processes so they don't hang
+		 */
+		printk("i915: Waking up sleeping processes\n");
+		DRM_WAKEUP(&dev_priv->irq_queue);
+	}
+
 	queue_work(dev_priv->wq, &dev_priv->error_work);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f7c2de8..99981a0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -85,6 +85,10 @@
 #define   I915_GC_RENDER_CLOCK_200_MHZ	(1 << 0)
 #define   I915_GC_RENDER_CLOCK_333_MHZ	(4 << 0)
 #define LBB	0xf4
+#define GDRST 0xc0
+#define  GDRST_FULL	(0<<2)
+#define  GDRST_RENDER	(1<<2)
+#define  GDRST_MEDIA	(3<<2)
 
 /* VGA stuff */
 
-- 
1.6.3.3