[Intel-gfx] [PATCH] drm/i915: Try to stop rings before reset

Daniel Vetter daniel.vetter at ffwll.ch
Tue Apr 29 18:07:13 CEST 2014


On Tue, Apr 29, 2014 at 4:38 PM, Daniel Vetter <daniel.vetter at ffwll.ch> wrote:
> This seems to make the hard machine hangs provoked by running
> gem_reset_stats tests a lot less likely. At least on my snb here.
>
> v2: Don't DRM_ERROR when a reset is in progress in the stop_ring
> function.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=74100
> Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>

A bit a terse commit message ...

The theory that lead me to this patch is that these hard hangs only
started to show up with the endless batch chaining that
gem_reset_stats does. All previous gpu reset testcase we have
essentially reset the gpu while it is completely idle. If figured that
trying to idle it first (by stopping the CS) can't hurt, and indeed it
seems to help a lot.
-Daniel

> ---
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 13 ++++++++-----
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
>  drivers/gpu/drm/i915/intel_uncore.c     |  7 +++++++
>  3 files changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index ab22d70733bf..e8ca1dcb6a45 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -448,14 +448,17 @@ static void ring_setup_phys_status_page(struct intel_ring_buffer *ring)
>         I915_WRITE(HWS_PGA, addr);
>  }
>
> -static bool stop_ring(struct intel_ring_buffer *ring)
> +bool __intel_stop_ring_buffer(struct intel_ring_buffer *ring)
>  {
>         struct drm_i915_private *dev_priv = to_i915(ring->dev);
>
>         if (!IS_GEN2(ring->dev)) {
>                 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
>                 if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
> -                       DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
> +                       if (!i915_reset_in_progress(&dev_priv->gpu_error)) {
> +                               DRM_ERROR("%s :timed out trying to stop ring\n",
> +                                         ring->name);
> +                       }
>                         return false;
>                 }
>         }
> @@ -481,7 +484,7 @@ static int init_ring_common(struct intel_ring_buffer *ring)
>
>         gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
>
> -       if (!stop_ring(ring)) {
> +       if (!__intel_stop_ring_buffer(ring)) {
>                 /* G45 ring initialization often fails to reset head to zero */
>                 DRM_DEBUG_KMS("%s head not reset to zero "
>                               "ctl %08x head %08x tail %08x start %08x\n",
> @@ -491,7 +494,7 @@ static int init_ring_common(struct intel_ring_buffer *ring)
>                               I915_READ_TAIL(ring),
>                               I915_READ_START(ring));
>
> -               if (!stop_ring(ring)) {
> +               if (!__intel_stop_ring_buffer(ring)) {
>                         DRM_ERROR("failed to set %s head to zero "
>                                   "ctl %08x head %08x tail %08x start %08x\n",
>                                   ring->name,
> @@ -2338,5 +2341,5 @@ intel_stop_ring_buffer(struct intel_ring_buffer *ring)
>                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
>                           ring->name, ret);
>
> -       stop_ring(ring);
> +       __intel_stop_ring_buffer(ring);
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 13e398f17fb2..f3c25bd5c68d 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -266,6 +266,7 @@ intel_write_status_page(struct intel_ring_buffer *ring,
>  #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
>
>  void intel_stop_ring_buffer(struct intel_ring_buffer *ring);
> +bool __intel_stop_ring_buffer(struct intel_ring_buffer *ring);
>  void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
>
>  int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n);
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 76dc185793ce..18d41d04cbd8 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1034,6 +1034,13 @@ static int gen6_do_reset(struct drm_device *dev)
>
>  int intel_gpu_reset(struct drm_device *dev)
>  {
> +       struct drm_i915_private *dev_priv = dev->dev_private;
> +       struct intel_ring_buffer *ring;
> +       int i;
> +
> +       for_each_ring(ring, dev_priv, i)
> +               __intel_stop_ring_buffer(ring);
> +
>         switch (INTEL_INFO(dev)->gen) {
>         case 8:
>         case 7:
> --
> 1.8.1.4
>



-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch



More information about the Intel-gfx mailing list