[Intel-gfx] [PATCH 2/3] drm/i915/tdr: Prepare error handler to accept mask of hung engines
Mika Kuoppala
mika.kuoppala at linux.intel.com
Tue Mar 22 12:04:56 UTC 2016
Arun Siluvery <arun.siluvery at linux.intel.com> writes:
> [ text/plain ]
> In preparation for engine reset, the wedged argument of i915_handle_error()
> is extended to reflect as a mask of engines that are hung. This is further
> passed down to error state capture functions which are also updated.
>
> Engine reset recovery mechanism uses this mask and schedules recovery work
> for those particular engines.
>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Mika Kuoppala <mika.kuoppala at intel.com>
> Signed-off-by: Tomas Elf <tomas.elf at intel.com>
> Signed-off-by: Arun Siluvery <arun.siluvery at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 4 ++--
> drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++----
> drivers/gpu/drm/i915/i915_irq.c | 16 ++++++++--------
> 3 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 549a232..49ac065 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2735,7 +2735,7 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port);
> /* i915_irq.c */
> void i915_queue_hangcheck(struct drm_device *dev);
> __printf(3, 4)
> -void i915_handle_error(struct drm_device *dev, bool wedged,
> +void i915_handle_error(struct drm_device *dev, u32 engine_mask,
> const char *fmt, ...);
>
> extern void intel_irq_init(struct drm_i915_private *dev_priv);
> @@ -3321,7 +3321,7 @@ static inline void i915_error_state_buf_release(
> {
> kfree(eb->buf);
> }
> -void i915_capture_error_state(struct drm_device *dev, bool wedge,
> +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
> const char *error_msg);
> void i915_error_state_get(struct drm_device *dev,
> struct i915_error_state_file_priv *error_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index db8600a..1f8ff06 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1301,7 +1301,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
>
> static void i915_error_capture_msg(struct drm_device *dev,
> struct drm_i915_error_state *error,
> - bool wedged,
> + u32 engine_mask,
> const char *error_msg)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -1324,7 +1324,7 @@ static void i915_error_capture_msg(struct drm_device *dev,
> scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
> ", reason: %s, action: %s",
> error_msg,
> - wedged ? "reset" : "continue");
> + engine_mask ? "reset" : "continue");
> }
>
> static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
> @@ -1347,7 +1347,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
> * out a structure which becomes available in debugfs for user level tools
> * to pick up.
> */
> -void i915_capture_error_state(struct drm_device *dev, bool wedged,
> +void i915_capture_error_state(struct drm_device *dev, u32 engine_mask,
> const char *error_msg)
> {
> static bool warned;
> @@ -1375,7 +1375,7 @@ void i915_capture_error_state(struct drm_device *dev, bool wedged,
> error->overlay = intel_overlay_capture_error_state(dev);
> error->display = intel_display_capture_error_state(dev);
>
> - i915_error_capture_msg(dev, error, wedged, error_msg);
> + i915_error_capture_msg(dev, error, engine_mask, error_msg);
> DRM_INFO("%s\n", error->error_msg);
>
> spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 8f3e330..a55a7cc 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -2653,14 +2653,14 @@ static void i915_report_and_clear_eir(struct drm_device *dev)
> /**
> * i915_handle_error - handle a gpu error
> * @dev: drm device
> - *
> + * @engine_mask: mask representing engines that are hung
> * Do some basic checking of register state at error time and
> * dump it to the syslog. Also call i915_capture_error_state() to make
> * sure we get a record and make it available in debugfs. Fire a uevent
> * so userspace knows something bad happened (should trigger collection
> * of a ring dump etc.).
> */
> -void i915_handle_error(struct drm_device *dev, bool wedged,
> +void i915_handle_error(struct drm_device *dev, u32 engine_mask,
> const char *fmt, ...)
> {
> struct drm_i915_private *dev_priv = dev->dev_private;
> @@ -2671,10 +2671,10 @@ void i915_handle_error(struct drm_device *dev, bool wedged,
> vscnprintf(error_msg, sizeof(error_msg), fmt, args);
> va_end(args);
>
> - i915_capture_error_state(dev, wedged, error_msg);
> + i915_capture_error_state(dev, engine_mask, error_msg);
> i915_report_and_clear_eir(dev);
>
> - if (wedged) {
> + if (engine_mask) {
> atomic_or(I915_RESET_IN_PROGRESS_FLAG,
> &dev_priv->gpu_error.reset_counter);
>
> @@ -3033,7 +3033,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
> */
> tmp = I915_READ_CTL(engine);
> if (tmp & RING_WAIT) {
> - i915_handle_error(dev, false,
> + i915_handle_error(dev, 0,
> "Kicking stuck wait on %s",
> engine->name);
> I915_WRITE_CTL(engine, tmp);
> @@ -3045,7 +3045,7 @@ ring_stuck(struct intel_engine_cs *engine, u64 acthd)
> default:
> return HANGCHECK_HUNG;
> case 1:
> - i915_handle_error(dev, false,
> + i915_handle_error(dev, 0,
> "Kicking stuck semaphore on %s",
> engine->name);
> I915_WRITE_CTL(engine, tmp);
> @@ -3189,12 +3189,12 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
> DRM_INFO("%s on %s\n",
> stuck[i] ? "stuck" : "no progress",
> engine->name);
> - rings_hung++;
> + rings_hung |= intel_engine_flag(engine);
We can change the int to u32 when we rename rings_hung to engines_hung.
Reviewed-by: Mika Kuoppala <mika.kuoppala at intel.com>
> }
> }
>
> if (rings_hung) {
> - i915_handle_error(dev, true, "Ring hung");
> + i915_handle_error(dev, rings_hung, "Engine(s) hung");
> goto out;
> }
>
> --
> 1.9.1
More information about the Intel-gfx
mailing list