[Intel-gfx] [PATCH 5/6] drm/i915: Implement GPU reset
Daniel J Blueman
daniel.blueman at gmail.com
Sun Sep 13 19:27:48 CEST 2009
A couple of things just caught my eye while looking through the patch,
perhaps to consider tweaking?
On Sun, Sep 13, 2009 at 3:51 PM, Ben Gamari <bgamari.foss at gmail.com> wrote:
> This patch puts in place the machinery to attempt to reset the GPU. This
> will be used when attempting to recover from a GPU hang.
>
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
> Signed-off-by: Ben Gamari <bgamari.foss at gmail.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 8 ++
> drivers/gpu/drm/i915/i915_drv.c | 141 +++++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_irq.c | 8 ++
> drivers/gpu/drm/i915/i915_reg.h | 4 +
> 5 files changed, 162 insertions(+), 0 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index aabe41b..25f8e75 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1173,6 +1173,9 @@ static int i915_load_modeset_init(struct drm_device *dev,
> drm_mm_init(&dev_priv->vram, 0, prealloc_size);
> DRM_INFO("set up %ldM of stolen space\n", prealloc_size / (1024*1024));
>
> + /* We're off and running w/KMS */
> + dev_priv->mm.suspended = 0;
> +
> /* Let GEM Manage from end of prealloc space to end of aperture.
> *
> * However, leave one page at the end still bound to the scratch page.
> @@ -1184,7 +1187,9 @@ static int i915_load_modeset_init(struct drm_device *dev,
> */
> i915_gem_do_init(dev, prealloc_size, agp_size - 4096);
>
> + mutex_lock(&dev->struct_mutex);
> ret = i915_gem_init_ringbuffer(dev);
> + mutex_unlock(&dev->struct_mutex);
> if (ret)
> goto out;
>
> @@ -1433,6 +1438,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
> return ret;
> }
>
> + /* Start out suspended */
> + dev_priv->mm.suspended = 1;
> +
> if (drm_core_check_feature(dev, DRIVER_MODESET)) {
> ret = i915_load_modeset_init(dev, prealloc_start,
> prealloc_size, agp_size);
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index dbe568c..0c03a2a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -127,6 +127,147 @@ static int i915_resume(struct drm_device *dev)
> return ret;
> }
>
> +/**
> + * i965_reset - reset chip after a hang
> + * @dev: drm device to reset
> + * @flags: reset domains
> + *
> + * Reset the chip. Useful if a hang is detected.
> + *
> + * Procedure is fairly simple:
> + * - reset the chip using the reset reg
> + * - re-init context state
> + * - re-init hardware status page
> + * - re-init ring buffer
> + * - re-init interrupt state
> + * - re-init display
> + */
> +void i965_reset(struct drm_device *dev, u8 flags)
> +{
> + drm_i915_private_t *dev_priv = dev->dev_private;
> + unsigned long timeout;
> + u8 gdrst;
> + bool need_display = true; //!(flags & (GDRST_RENDER | GDRST_MEDIA));
As the kernel's coding style isn't C99, is it worth using /* foo */
here to be compliant?
> +
> +#if defined(CONFIG_SMP)
> + timeout = jiffies + msecs_to_jiffies(500);
> + do {
> + udelay(100);
> + } while (mutex_is_locked(&dev->struct_mutex) && time_after(timeout, jiffies));
> +
> + if (mutex_is_locked(&dev->struct_mutex)) {
> +#if 1
> + DRM_ERROR("i915 struct_mutex lock is still held by %s. Giving on up reset.\n", dev->struct_mutex.owner->task->comm);
> + return;
> +#else
> + struct task_struct *task = dev->struct_mutex.owner->task;
> + DRM_ERROR("Killing process %d (%s) for holding i915 device mutex\n",
> + task->pid, task->comm);
> + force_sig(SIGILL, task);
Should be SIGKILL here?
> +#endif
> + }
> +#else
> + BUG_ON(mutex_is_locked(&dev->struct_mutex));
> +#endif
> +
> + debug_show_all_locks();
> + mutex_lock(&dev->struct_mutex);
> +
> + /*
> + * Clear request list
> + */
> + i915_gem_retire_requests(dev);
> +
> + if (need_display)
> + i915_save_display(dev);
> +
> + if (IS_I965G(dev) || IS_G4X(dev)) {
> + /*
> + * Set the domains we want to reset, then the reset bit (bit 0).
> + * Clear the reset bit after a while and wait for hardware status
> + * bit (bit 1) to be set
> + */
> + pci_read_config_byte(dev->pdev, GDRST, &gdrst);
> + //TODO: Set domains
Consider using kernel coding style perhaps?
> + pci_write_config_byte(dev->pdev, GDRST, gdrst | flags | ((flags == GDRST_FULL) ? 0x1 : 0x0));
> + udelay(50);
> + pci_write_config_byte(dev->pdev, GDRST, gdrst & 0xfe);
> +
> + /* ...we don't want to loop forever though, 500ms should be plenty */
> + timeout = jiffies + msecs_to_jiffies(500);
> + do {
> + udelay(100);
> + pci_read_config_byte(dev->pdev, GDRST, &gdrst);
> + } while ((gdrst & 0x1) && time_after(timeout, jiffies));
> +
> + if (gdrst & 0x1) {
> + DRM_ERROR("Failed to reset chip. You're screwed.");
Perhaps drop the second sentence, to keep things looking neat.
> + mutex_unlock(&dev->struct_mutex);
> + return;
> + }
> + } else {
> + DRM_ERROR("Error occurred. Don't know how to reset this chip.\n");
> + return;
> + }
> +
> + /* Ok, now get things going again... */
> +
> + /*
> + * Everything depends on having the GTT running, so we need to start
> + * there. Fortunately we don't need to do this unless we reset the
> + * chip at a PCI level.
> + *
> + * Next we need to restore the context, but we don't use those
> + * yet either...
> + *
> + * Ring buffer needs to be re-initialized in the KMS case, or if X
> + * was running at the time of the reset (i.e. we weren't VT
> + * switched away).
> + */
> + if (drm_core_check_feature(dev, DRIVER_MODESET) ||
> + !dev_priv->mm.suspended) {
> + drm_i915_ring_buffer_t *ring = &dev_priv->ring;
> + struct drm_gem_object *obj = ring->ring_obj;
> + struct drm_i915_gem_object *obj_priv = obj->driver_private;
> + dev_priv->mm.suspended = 0;
> +
> + /* Stop the ring if it's running. */
> + I915_WRITE(PRB0_CTL, 0);
> + I915_WRITE(PRB0_TAIL, 0);
> + I915_WRITE(PRB0_HEAD, 0);
> +
> + /* Initialize the ring. */
> + I915_WRITE(PRB0_START, obj_priv->gtt_offset);
> + I915_WRITE(PRB0_CTL,
> + ((obj->size - 4096) & RING_NR_PAGES) |
> + RING_NO_REPORT |
> + RING_VALID);
> + if (!drm_core_check_feature(dev, DRIVER_MODESET))
> + i915_kernel_lost_context(dev);
> + else {
> + ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
> + ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
> + ring->space = ring->head - (ring->tail + 8);
> + if (ring->space < 0)
> + ring->space += ring->Size;
> + }
> +
> + mutex_unlock(&dev->struct_mutex);
> + drm_irq_uninstall(dev);
> + drm_irq_install(dev);
> + mutex_lock(&dev->struct_mutex);
> + }
> +
> + /*
> + * Display needs restore too...
> + */
> + if (need_display)
> + i915_restore_display(dev);
> +
> + mutex_unlock(&dev->struct_mutex);
> +}
> +
> +
> static int __devinit
> i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> {
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index afbcaa9..8797777 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -624,6 +624,7 @@ extern long i915_compat_ioctl(struct file *filp, unsigned int cmd,
> extern int i915_emit_box(struct drm_device *dev,
> struct drm_clip_rect *boxes,
> int i, int DR1, int DR4);
> +extern void i965_reset(struct drm_device *dev, u8 flags);
>
> /* i915_irq.c */
> void i915_hangcheck_elapsed(unsigned long data);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 126696a..dbfcf0a 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -482,6 +482,14 @@ static void i915_handle_error(struct drm_device *dev)
> I915_WRITE(IIR, I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT);
> }
>
> + if (dev_priv->mm.wedged) {
> + /*
> + * Wakeup waiting processes so they don't hang
> + */
> + printk("i915: Waking up sleeping processes\n");
> + DRM_WAKEUP(&dev_priv->irq_queue);
> + }
> +
> queue_work(dev_priv->wq, &dev_priv->error_work);
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index f7c2de8..99981a0 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -85,6 +85,10 @@
> #define I915_GC_RENDER_CLOCK_200_MHZ (1 << 0)
> #define I915_GC_RENDER_CLOCK_333_MHZ (4 << 0)
> #define LBB 0xf4
> +#define GDRST 0xc0
> +#define GDRST_FULL (0<<2)
> +#define GDRST_RENDER (1<<2)
> +#define GDRST_MEDIA (3<<2)
>
> /* VGA stuff */
>
--
Daniel J Blueman
More information about the Intel-gfx
mailing list