[Intel-gfx] [PATCH] drm/i915: Attempt to recover from page table errors.

Eric Anholt eric at anholt.net
Fri Dec 12 04:59:12 CET 2008


This doesn't appear to actually work at the moment -- comes back to ESR bit
still set and ring wedged.  I fear that we may need to hit the big red switch.
---
 drivers/gpu/drm/i915/Makefile     |    1 +
 drivers/gpu/drm/i915/i915_drv.h   |   10 ++
 drivers/gpu/drm/i915/i915_error.c |  187 +++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem.c   |  101 +++++++++++---------
 drivers/gpu/drm/i915/i915_irq.c   |   83 ----------------
 drivers/gpu/drm/i915/i915_reg.h   |    6 +
 6 files changed, 260 insertions(+), 128 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_error.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dd57a5b..6088da2 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -4,6 +4,7 @@
 
 ccflags-y := -Iinclude/drm
 i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
+	  i915_error.o \
           i915_suspend.o \
 	  i915_gem.o \
 	  i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 467572a..6584b4a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -470,6 +470,8 @@ enum intel_chip_family {
 	CHIP_I965 = 0x08,
 };
 
+#define I915_GEM_GPU_DOMAINS	(~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+
 extern struct drm_ioctl_desc i915_ioctls[];
 extern int i915_max_ioctl;
 extern unsigned int i915_fbpercrtc;
@@ -537,6 +539,9 @@ extern int i915_mem_destroy_heap(struct drm_device *dev, void *data,
 extern void i915_mem_takedown(struct mem_block **heap);
 extern void i915_mem_release(struct drm_device * dev,
 			     struct drm_file *file_priv, struct mem_block *heap);
+/* i915_error.c */
+void i915_report_and_clear_errors(struct drm_device *dev);
+
 /* i915_gem.c */
 int i915_gem_init_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
@@ -591,11 +596,16 @@ int i915_gem_object_set_domain(struct drm_gem_object *obj,
 			       uint32_t write_domain);
 int i915_gem_init_ringbuffer(struct drm_device *dev);
 void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+int i915_gem_ring_reset(struct drm_device *dev);
 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
 		     unsigned long end);
 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
 				      int write);
+void i915_gem_flush(struct drm_device *dev,
+		    uint32_t invalidate_domains,
+		    uint32_t flush_domains);
+uint32_t i915_add_request(struct drm_device *dev, uint32_t flush_domains);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_error.c b/drivers/gpu/drm/i915/i915_error.c
new file mode 100644
index 0000000..15133eb
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_error.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric at anholt.net>
+ *
+ */
+
+/** @file i915_error.c
+ *
+ * Graphics chip error logging and recovery.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "intel_drv.h"
+
+static struct {
+	u32 bit;
+	char *name;
+	int is_render;
+} pgtbl_er_bits[] = {
+	{I915_PGTBL_CURSORA_INVALID_GTT_PTE, "cursor A PTE", 0},
+	{I915_PGTBL_CURSORB_INVALID_GTT_PTE, "cursor B PTE", 0},
+	{I915_PGTBL_MT_INVALID_GTT_PTE, "sampler PTE", 1},
+	{I915_PGTBL_LC_INVALID_GTT_PTE, "render cache PTE", 1},
+	{I915_PGTBL_ISC_INVALID_GTT_PTE, "instruction/state cache PTE", 1},
+	{I915_PGTBL_CS_VERTEX_DATA_INVALID_GTT_PTE, "vertex data PTE", 1},
+	{I915_PGTBL_CS_INSTRUCTION_INVALID_GTT_PTE, "command data PTE", 1},
+	{I915_PGTBL_CS_INVALID_GTT_PTE, "command streamer PTE", 1},
+	{I915_PGTBL_OVERLAY_INVALID_TILING, "overlay tiling", 0},
+	{I915_PGTBL_OVERLAY_INVALID_GTT_PTE, "overlay PTE", 0},
+	{I915_PGTBL_DISPC_INVALID_TILING, "display C tiling", 0},
+	{I915_PGTBL_DISPC_INVALID_GTT_PTE, "display C PTE", 0},
+	{I915_PGTBL_DISPB_INVALID_TILING, "display B tiling", 0},
+	{I915_PGTBL_DISPB_INVALID_GTT_PTE, "display B PTE", 0},
+	{I915_PGTBL_DISPA_INVALID_TILING, "display A tiling", 0},
+	{I915_PGTBL_DISPA_INVALID_GTT_PTE, "display A PTE", 0},
+	{I915_PGTBL_HOST_INVALID_PTE_DATA, "host PTE memory location", 0},
+	{I915_PGTBL_HOST_INVALID_GTT_PTE, "host PTE", 0},
+};
+
+static int
+i915_reset_render_and_ring(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	int ret, i;
+	u8 gdrst;
+
+	/* XXX: need more locking */
+
+	/* We need GEM control if we're to do this recovery.  Otherwise, the
+	 * X Server has its own idea of what the ring state is.
+	 */
+	if (dev_priv->ring.ring_obj == NULL) {
+		DRM_ERROR("Can't reset the device in non-GEM mode\n");
+		return -EINVAL;
+	}
+
+	/* Only one we've done reset for so far. */
+	if (!IS_I965G(dev)) {
+		DRM_ERROR("Can't reset non-G965-class devices\n");
+		return -EINVAL;
+	}
+
+	pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+	if (gdrst & GDRST_ENABLE) {
+		DRM_ERROR("Reset started with reset bit already set!\n");
+		return -EBUSY;
+	}
+
+	pci_write_config_byte(dev->pdev, GDRST, GDRST_ENABLE | GDRST_RENDER);
+
+	for (i = 0; i < 100000; i++) {
+		pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+		if ((gdrst & GDRST_ENABLE) == 0)
+			break;
+	}
+	if (i == 1000000)
+		DRM_ERROR("Failed to reset the render engine\n");
+
+	ret = i915_gem_ring_reset(dev);
+	if (ret != 0) {
+		DRM_ERROR("Failed to reset ringbuffer: %d\n", ret);
+		return ret;
+	}
+
+	DRM_ERROR("Reset the render engine and ringbuffer\n");
+
+#if 0
+	/* XXX: what's a lock? */
+	i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+	(void)i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+#else
+	/* Flush out all the old requests */
+	dev_priv->mm.wedged = 1;
+	i915_gem_retire_requests(dev);
+	dev_priv->mm.wedged = 0;
+#endif
+
+	return 0;
+}
+
+void
+i915_report_and_clear_errors(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	u16 eir;
+
+	eir = I915_READ16(EIR);
+
+	if (eir & I915_ERROR_INSTRUCTION) {
+		u32 ipehr;
+
+		DRM_ERROR("Encountered instruction error.  Clearing.\n");
+		if (IS_I965G(dev))
+			ipehr = I915_READ(IPEHR_I965);
+		else
+			ipehr = I915_READ(IPEHR);
+		DRM_INFO("Instruction header: 0x%08x\n", ipehr);
+	}
+
+	if (eir & I915_ERROR_MAIN_MEMORY_REFRESH)
+		DRM_ERROR("Encountered main memory refresh error.  "
+			  "Clearing.\n");
+
+	if (eir & I915_ERROR_DISPLAY_UNDERRUN) {
+		DRM_ERROR("Encountered display underrun error.  Clearing.\n");
+		/* XXX clear it in the pipestat instead of suppressing it*/
+		I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_DISPLAY_UNDERRUN);
+	}
+
+	if (eir & I915_ERROR_PAGE_TABLE) {
+		u32 pgtbl_er = I915_READ(PGTBL_ER);
+		int i;
+		int is_render = 0;
+
+		DRM_ERROR("Encountered page table error.\n");
+		DRM_INFO("Can't be cleared without device reset, disabling.\n");
+
+		for (i = 0; i < ARRAY_SIZE(pgtbl_er_bits); i++) {
+			if (pgtbl_er & pgtbl_er_bits[i].bit) {
+				DRM_INFO("PGTBL_ER: invalid %s\n",
+					 pgtbl_er_bits[i].name);
+				pgtbl_er &= ~pgtbl_er_bits[i].bit;
+				is_render |= pgtbl_er_bits[i].is_render;
+			}
+		}
+		if (pgtbl_er != 0) {
+			DRM_INFO("PGTBL_ER: unknown bits: 0x%04x\n",
+				 pgtbl_er);
+		}
+		if (is_render)
+			i915_reset_render_and_ring(dev);
+
+		I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_PAGE_TABLE);
+	}
+
+	if (eir & I915_ERROR_MAX_VERTEX)
+		DRM_ERROR("Encountered max vertex error.  Clearing.\n");
+
+	I915_WRITE(EIR, eir);
+	eir = I915_READ16(EIR); /* posting read */
+	if (eir != 0)
+		DRM_INFO("Potential un-cleared error bits: 0x%04x\n", eir);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 76056ca..de2d880 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -868,7 +868,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
  *
  * Returned sequence numbers are nonzero on success.
  */
-static uint32_t
+uint32_t
 i915_add_request(struct drm_device *dev, uint32_t flush_domains)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -1099,7 +1099,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
 	return ret;
 }
 
-static void
+void
 i915_gem_flush(struct drm_device *dev,
 	       uint32_t invalidate_domains,
 	       uint32_t flush_domains)
@@ -3060,59 +3060,21 @@ i915_gem_init_hws(struct drm_device *dev)
 }
 
 int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_ring_reset(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_gem_object *obj;
-	struct drm_i915_gem_object *obj_priv;
 	drm_i915_ring_buffer_t *ring = &dev_priv->ring;
-	int ret;
+	struct drm_gem_object *ring_obj = ring->ring_obj;
+	struct drm_i915_gem_object *ring_obj_priv = ring_obj->driver_private;
 	u32 head;
 
-	ret = i915_gem_init_hws(dev);
-	if (ret != 0)
-		return ret;
-
-	obj = drm_gem_object_alloc(dev, 128 * 1024);
-	if (obj == NULL) {
-		DRM_ERROR("Failed to allocate ringbuffer\n");
-		return -ENOMEM;
-	}
-	obj_priv = obj->driver_private;
-
-	ret = i915_gem_object_pin(obj, 4096);
-	if (ret != 0) {
-		drm_gem_object_unreference(obj);
-		return ret;
-	}
-
-	/* Set up the kernel mapping for the ring. */
-	ring->Size = obj->size;
-	ring->tail_mask = obj->size - 1;
-
-	ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
-	ring->map.size = obj->size;
-	ring->map.type = 0;
-	ring->map.flags = 0;
-	ring->map.mtrr = 0;
-
-	drm_core_ioremap_wc(&ring->map, dev);
-	if (ring->map.handle == NULL) {
-		DRM_ERROR("Failed to map ringbuffer.\n");
-		memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
-		drm_gem_object_unreference(obj);
-		return -EINVAL;
-	}
-	ring->ring_obj = obj;
-	ring->virtual_start = ring->map.handle;
-
 	/* Stop the ring if it's running. */
 	I915_WRITE(PRB0_CTL, 0);
 	I915_WRITE(PRB0_TAIL, 0);
 	I915_WRITE(PRB0_HEAD, 0);
 
 	/* Initialize the ring. */
-	I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+	I915_WRITE(PRB0_START, ring_obj_priv->gtt_offset);
 	head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
 
 	/* G45 ring initialization fails to reset head to zero */
@@ -3134,7 +3096,7 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
 	}
 
 	I915_WRITE(PRB0_CTL,
-		   ((obj->size - 4096) & RING_NR_PAGES) |
+		   ((ring_obj->size - 4096) & RING_NR_PAGES) |
 		   RING_NO_REPORT |
 		   RING_VALID);
 
@@ -3165,6 +3127,55 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
 	return 0;
 }
 
+int
+i915_gem_init_ringbuffer(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+	drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+	int ret;
+
+	ret = i915_gem_init_hws(dev);
+	if (ret != 0)
+		return ret;
+
+	obj = drm_gem_object_alloc(dev, 128 * 1024);
+	if (obj == NULL) {
+		DRM_ERROR("Failed to allocate ringbuffer\n");
+		return -ENOMEM;
+	}
+	obj_priv = obj->driver_private;
+
+	ret = i915_gem_object_pin(obj, 4096);
+	if (ret != 0) {
+		drm_gem_object_unreference(obj);
+		return ret;
+	}
+
+	/* Set up the kernel mapping for the ring. */
+	ring->Size = obj->size;
+	ring->tail_mask = obj->size - 1;
+
+	ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
+	ring->map.size = obj->size;
+	ring->map.type = 0;
+	ring->map.flags = 0;
+	ring->map.mtrr = 0;
+
+	drm_core_ioremap_wc(&ring->map, dev);
+	if (ring->map.handle == NULL) {
+		DRM_ERROR("Failed to map ringbuffer.\n");
+		memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
+		drm_gem_object_unreference(obj);
+		return -EINVAL;
+	}
+	ring->ring_obj = obj;
+	ring->virtual_start = ring->map.handle;
+
+	return i915_gem_ring_reset(dev);
+}
+
 void
 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0e6c0ec..4b11290 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -175,89 +175,6 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe)
 	return count;
 }
 
-static struct {
-	u32 bit;
-	char *name;
-} pgtbl_er_bits[] = {
-	{I915_PGTBL_CURSORA_INVALID_GTT_PTE, "cursor A PTE"},
-	{I915_PGTBL_CURSORB_INVALID_GTT_PTE, "cursor B PTE"},
-	{I915_PGTBL_MT_INVALID_GTT_PTE, "sampler PTE"},
-	{I915_PGTBL_LC_INVALID_GTT_PTE, "render cache PTE"},
-	{I915_PGTBL_ISC_INVALID_GTT_PTE, "instruction/state cache PTE"},
-	{I915_PGTBL_CS_VERTEX_DATA_INVALID_GTT_PTE, "vertex data PTE"},
-	{I915_PGTBL_CS_INSTRUCTION_INVALID_GTT_PTE, "command data PTE"},
-	{I915_PGTBL_CS_INVALID_GTT_PTE, "command streamer PTE"},
-	{I915_PGTBL_OVERLAY_INVALID_TILING, "overlay tiling"},
-	{I915_PGTBL_OVERLAY_INVALID_GTT_PTE, "overlay PTE"},
-	{I915_PGTBL_DISPC_INVALID_TILING, "display C tiling"},
-	{I915_PGTBL_DISPC_INVALID_GTT_PTE, "display C PTE"},
-	{I915_PGTBL_DISPB_INVALID_TILING, "display B tiling"},
-	{I915_PGTBL_DISPB_INVALID_GTT_PTE, "display B PTE"},
-	{I915_PGTBL_DISPA_INVALID_TILING, "display A tiling"},
-	{I915_PGTBL_DISPA_INVALID_GTT_PTE, "display A PTE"},
-	{I915_PGTBL_HOST_INVALID_PTE_DATA, "host PTE memory location"},
-	{I915_PGTBL_HOST_INVALID_GTT_PTE, "host PTE"},
-};
-
-static void
-i915_report_and_clear_errors(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u16 eir;
-
-	eir = I915_READ16(EIR);
-
-	if (eir & I915_ERROR_INSTRUCTION) {
-		u32 ipehr;
-
-		DRM_ERROR("Encountered instruction error.  Clearing.\n");
-		if (IS_I965G(dev))
-			ipehr = I915_READ(IPEHR_I965);
-		else
-			ipehr = I915_READ(IPEHR);
-		DRM_INFO("Instruction header: 0x%08x\n", ipehr);
-	}
-
-	if (eir & I915_ERROR_MAIN_MEMORY_REFRESH)
-		DRM_ERROR("Encountered main memory refresh error.  "
-			  "Clearing.\n");
-
-	if (eir & I915_ERROR_DISPLAY_UNDERRUN) {
-		DRM_ERROR("Encountered display underrun error.  Clearing.\n");
-		/* XXX clear it in the pipestat instead of suppressing it*/
-		I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_DISPLAY_UNDERRUN);
-	}
-
-	if (eir & I915_ERROR_PAGE_TABLE) {
-		u32 pgtbl_er = I915_READ(PGTBL_ER);
-		int i;
-
-		DRM_ERROR("Encountered page table error.\n");
-		DRM_INFO("Can't be cleared without device reset, disabling.\n");
-		I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_PAGE_TABLE);
-
-		for (i = 0; i < ARRAY_SIZE(pgtbl_er_bits); i++) {
-			if (pgtbl_er & pgtbl_er_bits[i].bit) {
-				DRM_INFO("PGTBL_ER: invalid %s\n",
-					 pgtbl_er_bits[i].name);
-				pgtbl_er &= ~pgtbl_er_bits[i].bit;
-			}
-		}
-		if (pgtbl_er != 0) {
-			DRM_INFO("PGTBL_ER: unknown bits: 0x%04x\n",
-				 pgtbl_er);
-		}
-	}
-
-	if (eir & I915_ERROR_MAX_VERTEX)
-		DRM_ERROR("Encountered max vertex error.  Clearing.\n");
-
-	I915_WRITE(EIR, eir);
-	eir = I915_READ16(EIR); /* posting read */
-	if (eir != 0)
-		DRM_INFO("Potential un-cleared error bits: 0x%04x\n", eir);
-}
-
 irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 477ea04..a5970ac 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -48,6 +48,12 @@
 
 /* PCI config space */
 
+#define GDRST			0xc0
+#define   GDRST_ENABLE			(1 << 0)
+#define   GDRST_FULL			(0 << 2)
+#define   GDRST_RENDER			(1 << 2)
+#define   GDRST_MEDIA			(2 << 2)
+
 #define HPLLCC	0xc0 /* 855 only */
 #define   GC_CLOCK_CONTROL_MASK		(3 << 0)
 #define   GC_CLOCK_133_200		(0 << 0)
-- 
1.5.6.5




More information about the Intel-gfx mailing list