[Intel-gfx] [PATCH] drm/i915: Attempt to recover from page table errors.
Eric Anholt
eric at anholt.net
Fri Dec 12 04:59:12 CET 2008
This doesn't appear to actually work at the moment -- comes back to ESR bit
still set and ring wedged. I fear that we may need to hit the big red switch.
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 10 ++
drivers/gpu/drm/i915/i915_error.c | 187 +++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem.c | 101 +++++++++++---------
drivers/gpu/drm/i915/i915_irq.c | 83 ----------------
drivers/gpu/drm/i915/i915_reg.h | 6 +
6 files changed, 260 insertions(+), 128 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_error.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index dd57a5b..6088da2 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -4,6 +4,7 @@
ccflags-y := -Iinclude/drm
i915-y := i915_drv.o i915_dma.o i915_irq.o i915_mem.o \
+ i915_error.o \
i915_suspend.o \
i915_gem.o \
i915_gem_debug.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 467572a..6584b4a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -470,6 +470,8 @@ enum intel_chip_family {
CHIP_I965 = 0x08,
};
+#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
+
extern struct drm_ioctl_desc i915_ioctls[];
extern int i915_max_ioctl;
extern unsigned int i915_fbpercrtc;
@@ -537,6 +539,9 @@ extern int i915_mem_destroy_heap(struct drm_device *dev, void *data,
extern void i915_mem_takedown(struct mem_block **heap);
extern void i915_mem_release(struct drm_device * dev,
struct drm_file *file_priv, struct mem_block *heap);
+/* i915_error.c */
+void i915_report_and_clear_errors(struct drm_device *dev);
+
/* i915_gem.c */
int i915_gem_init_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
@@ -591,11 +596,16 @@ int i915_gem_object_set_domain(struct drm_gem_object *obj,
uint32_t write_domain);
int i915_gem_init_ringbuffer(struct drm_device *dev);
void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
+int i915_gem_ring_reset(struct drm_device *dev);
int i915_gem_do_init(struct drm_device *dev, unsigned long start,
unsigned long end);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj,
int write);
+void i915_gem_flush(struct drm_device *dev,
+ uint32_t invalidate_domains,
+ uint32_t flush_domains);
+uint32_t i915_add_request(struct drm_device *dev, uint32_t flush_domains);
/* i915_gem_tiling.c */
void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_error.c b/drivers/gpu/drm/i915/i915_error.c
new file mode 100644
index 0000000..15133eb
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_error.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright © 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric at anholt.net>
+ *
+ */
+
+/** @file i915_error.c
+ *
+ * Graphics chip error logging and recovery.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "intel_drv.h"
+
+static struct {
+ u32 bit;
+ char *name;
+ int is_render;
+} pgtbl_er_bits[] = {
+ {I915_PGTBL_CURSORA_INVALID_GTT_PTE, "cursor A PTE", 0},
+ {I915_PGTBL_CURSORB_INVALID_GTT_PTE, "cursor B PTE", 0},
+ {I915_PGTBL_MT_INVALID_GTT_PTE, "sampler PTE", 1},
+ {I915_PGTBL_LC_INVALID_GTT_PTE, "render cache PTE", 1},
+ {I915_PGTBL_ISC_INVALID_GTT_PTE, "instruction/state cache PTE", 1},
+ {I915_PGTBL_CS_VERTEX_DATA_INVALID_GTT_PTE, "vertex data PTE", 1},
+ {I915_PGTBL_CS_INSTRUCTION_INVALID_GTT_PTE, "command data PTE", 1},
+ {I915_PGTBL_CS_INVALID_GTT_PTE, "command streamer PTE", 1},
+ {I915_PGTBL_OVERLAY_INVALID_TILING, "overlay tiling", 0},
+ {I915_PGTBL_OVERLAY_INVALID_GTT_PTE, "overlay PTE", 0},
+ {I915_PGTBL_DISPC_INVALID_TILING, "display C tiling", 0},
+ {I915_PGTBL_DISPC_INVALID_GTT_PTE, "display C PTE", 0},
+ {I915_PGTBL_DISPB_INVALID_TILING, "display B tiling", 0},
+ {I915_PGTBL_DISPB_INVALID_GTT_PTE, "display B PTE", 0},
+ {I915_PGTBL_DISPA_INVALID_TILING, "display A tiling", 0},
+ {I915_PGTBL_DISPA_INVALID_GTT_PTE, "display A PTE", 0},
+ {I915_PGTBL_HOST_INVALID_PTE_DATA, "host PTE memory location", 0},
+ {I915_PGTBL_HOST_INVALID_GTT_PTE, "host PTE", 0},
+};
+
+static int
+i915_reset_render_and_ring(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ int ret, i;
+ u8 gdrst;
+
+ /* XXX: need more locking */
+
+ /* We need GEM control if we're to do this recovery. Otherwise, the
+ * X Server has its own idea of what the ring state is.
+ */
+ if (dev_priv->ring.ring_obj == NULL) {
+ DRM_ERROR("Can't reset the device in non-GEM mode\n");
+ return -EINVAL;
+ }
+
+ /* Only one we've done reset for so far. */
+ if (!IS_I965G(dev)) {
+ DRM_ERROR("Can't reset non-G965-class devices\n");
+ return -EINVAL;
+ }
+
+ pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+ if (gdrst & GDRST_ENABLE) {
+ DRM_ERROR("Reset started with reset bit already set!\n");
+ return -EBUSY;
+ }
+
+ pci_write_config_byte(dev->pdev, GDRST, GDRST_ENABLE | GDRST_RENDER);
+
+ for (i = 0; i < 100000; i++) {
+ pci_read_config_byte(dev->pdev, GDRST, &gdrst);
+ if ((gdrst & GDRST_ENABLE) == 0)
+ break;
+ }
+ if (i == 1000000)
+ DRM_ERROR("Failed to reset the render engine\n");
+
+ ret = i915_gem_ring_reset(dev);
+ if (ret != 0) {
+ DRM_ERROR("Failed to reset ringbuffer: %d\n", ret);
+ return ret;
+ }
+
+ DRM_ERROR("Reset the render engine and ringbuffer\n");
+
+#if 0
+ /* XXX: what's a lock? */
+ i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
+ (void)i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+#else
+ /* Flush out all the old requests */
+ dev_priv->mm.wedged = 1;
+ i915_gem_retire_requests(dev);
+ dev_priv->mm.wedged = 0;
+#endif
+
+ return 0;
+}
+
+void
+i915_report_and_clear_errors(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ u16 eir;
+
+ eir = I915_READ16(EIR);
+
+ if (eir & I915_ERROR_INSTRUCTION) {
+ u32 ipehr;
+
+ DRM_ERROR("Encountered instruction error. Clearing.\n");
+ if (IS_I965G(dev))
+ ipehr = I915_READ(IPEHR_I965);
+ else
+ ipehr = I915_READ(IPEHR);
+ DRM_INFO("Instruction header: 0x%08x\n", ipehr);
+ }
+
+ if (eir & I915_ERROR_MAIN_MEMORY_REFRESH)
+ DRM_ERROR("Encountered main memory refresh error. "
+ "Clearing.\n");
+
+ if (eir & I915_ERROR_DISPLAY_UNDERRUN) {
+ DRM_ERROR("Encountered display underrun error. Clearing.\n");
+ /* XXX clear it in the pipestat instead of suppressing it*/
+ I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_DISPLAY_UNDERRUN);
+ }
+
+ if (eir & I915_ERROR_PAGE_TABLE) {
+ u32 pgtbl_er = I915_READ(PGTBL_ER);
+ int i;
+ int is_render = 0;
+
+ DRM_ERROR("Encountered page table error.\n");
+ DRM_INFO("Can't be cleared without device reset, disabling.\n");
+
+ for (i = 0; i < ARRAY_SIZE(pgtbl_er_bits); i++) {
+ if (pgtbl_er & pgtbl_er_bits[i].bit) {
+ DRM_INFO("PGTBL_ER: invalid %s\n",
+ pgtbl_er_bits[i].name);
+ pgtbl_er &= ~pgtbl_er_bits[i].bit;
+ is_render |= pgtbl_er_bits[i].is_render;
+ }
+ }
+ if (pgtbl_er != 0) {
+ DRM_INFO("PGTBL_ER: unknown bits: 0x%04x\n",
+ pgtbl_er);
+ }
+ if (is_render)
+ i915_reset_render_and_ring(dev);
+
+ I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_PAGE_TABLE);
+ }
+
+ if (eir & I915_ERROR_MAX_VERTEX)
+ DRM_ERROR("Encountered max vertex error. Clearing.\n");
+
+ I915_WRITE(EIR, eir);
+ eir = I915_READ16(EIR); /* posting read */
+ if (eir != 0)
+ DRM_INFO("Potential un-cleared error bits: 0x%04x\n", eir);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 76056ca..de2d880 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -868,7 +868,7 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
*
* Returned sequence numbers are nonzero on success.
*/
-static uint32_t
+uint32_t
i915_add_request(struct drm_device *dev, uint32_t flush_domains)
{
drm_i915_private_t *dev_priv = dev->dev_private;
@@ -1099,7 +1099,7 @@ i915_wait_request(struct drm_device *dev, uint32_t seqno)
return ret;
}
-static void
+void
i915_gem_flush(struct drm_device *dev,
uint32_t invalidate_domains,
uint32_t flush_domains)
@@ -3060,59 +3060,21 @@ i915_gem_init_hws(struct drm_device *dev)
}
int
-i915_gem_init_ringbuffer(struct drm_device *dev)
+i915_gem_ring_reset(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- struct drm_gem_object *obj;
- struct drm_i915_gem_object *obj_priv;
drm_i915_ring_buffer_t *ring = &dev_priv->ring;
- int ret;
+ struct drm_gem_object *ring_obj = ring->ring_obj;
+ struct drm_i915_gem_object *ring_obj_priv = ring_obj->driver_private;
u32 head;
- ret = i915_gem_init_hws(dev);
- if (ret != 0)
- return ret;
-
- obj = drm_gem_object_alloc(dev, 128 * 1024);
- if (obj == NULL) {
- DRM_ERROR("Failed to allocate ringbuffer\n");
- return -ENOMEM;
- }
- obj_priv = obj->driver_private;
-
- ret = i915_gem_object_pin(obj, 4096);
- if (ret != 0) {
- drm_gem_object_unreference(obj);
- return ret;
- }
-
- /* Set up the kernel mapping for the ring. */
- ring->Size = obj->size;
- ring->tail_mask = obj->size - 1;
-
- ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
- ring->map.size = obj->size;
- ring->map.type = 0;
- ring->map.flags = 0;
- ring->map.mtrr = 0;
-
- drm_core_ioremap_wc(&ring->map, dev);
- if (ring->map.handle == NULL) {
- DRM_ERROR("Failed to map ringbuffer.\n");
- memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
- drm_gem_object_unreference(obj);
- return -EINVAL;
- }
- ring->ring_obj = obj;
- ring->virtual_start = ring->map.handle;
-
/* Stop the ring if it's running. */
I915_WRITE(PRB0_CTL, 0);
I915_WRITE(PRB0_TAIL, 0);
I915_WRITE(PRB0_HEAD, 0);
/* Initialize the ring. */
- I915_WRITE(PRB0_START, obj_priv->gtt_offset);
+ I915_WRITE(PRB0_START, ring_obj_priv->gtt_offset);
head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
/* G45 ring initialization fails to reset head to zero */
@@ -3134,7 +3096,7 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
}
I915_WRITE(PRB0_CTL,
- ((obj->size - 4096) & RING_NR_PAGES) |
+ ((ring_obj->size - 4096) & RING_NR_PAGES) |
RING_NO_REPORT |
RING_VALID);
@@ -3165,6 +3127,55 @@ i915_gem_init_ringbuffer(struct drm_device *dev)
return 0;
}
+int
+i915_gem_init_ringbuffer(struct drm_device *dev)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ struct drm_gem_object *obj;
+ struct drm_i915_gem_object *obj_priv;
+ drm_i915_ring_buffer_t *ring = &dev_priv->ring;
+ int ret;
+
+ ret = i915_gem_init_hws(dev);
+ if (ret != 0)
+ return ret;
+
+ obj = drm_gem_object_alloc(dev, 128 * 1024);
+ if (obj == NULL) {
+ DRM_ERROR("Failed to allocate ringbuffer\n");
+ return -ENOMEM;
+ }
+ obj_priv = obj->driver_private;
+
+ ret = i915_gem_object_pin(obj, 4096);
+ if (ret != 0) {
+ drm_gem_object_unreference(obj);
+ return ret;
+ }
+
+ /* Set up the kernel mapping for the ring. */
+ ring->Size = obj->size;
+ ring->tail_mask = obj->size - 1;
+
+ ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
+ ring->map.size = obj->size;
+ ring->map.type = 0;
+ ring->map.flags = 0;
+ ring->map.mtrr = 0;
+
+ drm_core_ioremap_wc(&ring->map, dev);
+ if (ring->map.handle == NULL) {
+ DRM_ERROR("Failed to map ringbuffer.\n");
+ memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
+ drm_gem_object_unreference(obj);
+ return -EINVAL;
+ }
+ ring->ring_obj = obj;
+ ring->virtual_start = ring->map.handle;
+
+ return i915_gem_ring_reset(dev);
+}
+
void
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
{
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0e6c0ec..4b11290 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -175,89 +175,6 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe)
return count;
}
-static struct {
- u32 bit;
- char *name;
-} pgtbl_er_bits[] = {
- {I915_PGTBL_CURSORA_INVALID_GTT_PTE, "cursor A PTE"},
- {I915_PGTBL_CURSORB_INVALID_GTT_PTE, "cursor B PTE"},
- {I915_PGTBL_MT_INVALID_GTT_PTE, "sampler PTE"},
- {I915_PGTBL_LC_INVALID_GTT_PTE, "render cache PTE"},
- {I915_PGTBL_ISC_INVALID_GTT_PTE, "instruction/state cache PTE"},
- {I915_PGTBL_CS_VERTEX_DATA_INVALID_GTT_PTE, "vertex data PTE"},
- {I915_PGTBL_CS_INSTRUCTION_INVALID_GTT_PTE, "command data PTE"},
- {I915_PGTBL_CS_INVALID_GTT_PTE, "command streamer PTE"},
- {I915_PGTBL_OVERLAY_INVALID_TILING, "overlay tiling"},
- {I915_PGTBL_OVERLAY_INVALID_GTT_PTE, "overlay PTE"},
- {I915_PGTBL_DISPC_INVALID_TILING, "display C tiling"},
- {I915_PGTBL_DISPC_INVALID_GTT_PTE, "display C PTE"},
- {I915_PGTBL_DISPB_INVALID_TILING, "display B tiling"},
- {I915_PGTBL_DISPB_INVALID_GTT_PTE, "display B PTE"},
- {I915_PGTBL_DISPA_INVALID_TILING, "display A tiling"},
- {I915_PGTBL_DISPA_INVALID_GTT_PTE, "display A PTE"},
- {I915_PGTBL_HOST_INVALID_PTE_DATA, "host PTE memory location"},
- {I915_PGTBL_HOST_INVALID_GTT_PTE, "host PTE"},
-};
-
-static void
-i915_report_and_clear_errors(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
- u16 eir;
-
- eir = I915_READ16(EIR);
-
- if (eir & I915_ERROR_INSTRUCTION) {
- u32 ipehr;
-
- DRM_ERROR("Encountered instruction error. Clearing.\n");
- if (IS_I965G(dev))
- ipehr = I915_READ(IPEHR_I965);
- else
- ipehr = I915_READ(IPEHR);
- DRM_INFO("Instruction header: 0x%08x\n", ipehr);
- }
-
- if (eir & I915_ERROR_MAIN_MEMORY_REFRESH)
- DRM_ERROR("Encountered main memory refresh error. "
- "Clearing.\n");
-
- if (eir & I915_ERROR_DISPLAY_UNDERRUN) {
- DRM_ERROR("Encountered display underrun error. Clearing.\n");
- /* XXX clear it in the pipestat instead of suppressing it*/
- I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_DISPLAY_UNDERRUN);
- }
-
- if (eir & I915_ERROR_PAGE_TABLE) {
- u32 pgtbl_er = I915_READ(PGTBL_ER);
- int i;
-
- DRM_ERROR("Encountered page table error.\n");
- DRM_INFO("Can't be cleared without device reset, disabling.\n");
- I915_WRITE16(EMR, I915_READ(EMR) | I915_ERROR_PAGE_TABLE);
-
- for (i = 0; i < ARRAY_SIZE(pgtbl_er_bits); i++) {
- if (pgtbl_er & pgtbl_er_bits[i].bit) {
- DRM_INFO("PGTBL_ER: invalid %s\n",
- pgtbl_er_bits[i].name);
- pgtbl_er &= ~pgtbl_er_bits[i].bit;
- }
- }
- if (pgtbl_er != 0) {
- DRM_INFO("PGTBL_ER: unknown bits: 0x%04x\n",
- pgtbl_er);
- }
- }
-
- if (eir & I915_ERROR_MAX_VERTEX)
- DRM_ERROR("Encountered max vertex error. Clearing.\n");
-
- I915_WRITE(EIR, eir);
- eir = I915_READ16(EIR); /* posting read */
- if (eir != 0)
- DRM_INFO("Potential un-cleared error bits: 0x%04x\n", eir);
-}
-
irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
{
struct drm_device *dev = (struct drm_device *) arg;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 477ea04..a5970ac 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -48,6 +48,12 @@
/* PCI config space */
+#define GDRST 0xc0
+#define GDRST_ENABLE (1 << 0)
+#define GDRST_FULL (0 << 2)
+#define GDRST_RENDER (1 << 2)
+#define GDRST_MEDIA (2 << 2)
+
#define HPLLCC 0xc0 /* 855 only */
#define GC_CLOCK_CONTROL_MASK (3 << 0)
#define GC_CLOCK_133_200 (0 << 0)
--
1.5.6.5
More information about the Intel-gfx
mailing list