[PATCH 3/7] watchdog
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jan 14 10:44:07 UTC 2021
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/gt/intel_context_types.h | 4 ++
.../drm/i915/gt/intel_execlists_submission.h | 2 +
drivers/gpu/drm/i915/gt/intel_gt.c | 3 ++
drivers/gpu/drm/i915/gt/intel_gt.h | 2 +
drivers/gpu/drm/i915/gt/intel_gt_requests.c | 21 ++++++++
drivers/gpu/drm/i915/gt/intel_gt_types.h | 7 +++
drivers/gpu/drm/i915/i915_request.c | 52 +++++++++++++++++++
drivers/gpu/drm/i915/i915_request.h | 8 +++
8 files changed, 99 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e10d78601bbd..b457d6c49325 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -97,6 +97,10 @@ struct intel_context {
#define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
+ struct {
+ u64 timeout_us;
+ } watchdog;
+
u32 *lrc_reg_state;
union {
struct {
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
index a8fd7adefd82..fd61dae820e9 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h
@@ -6,6 +6,7 @@
#ifndef __INTEL_EXECLISTS_SUBMISSION_H__
#define __INTEL_EXECLISTS_SUBMISSION_H__
+#include <linux/llist.h>
#include <linux/types.h>
struct drm_printer;
@@ -13,6 +14,7 @@ struct drm_printer;
struct i915_request;
struct intel_context;
struct intel_engine_cs;
+struct intel_gt;
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index d8e1ab412634..ff63034cff9c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -29,6 +29,9 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915)
INIT_LIST_HEAD(>->closed_vma);
spin_lock_init(>->closed_lock);
+ init_llist_head(>->watchdog.list);
+ INIT_WORK(>->watchdog.work, intel_gt_watchdog_work);
+
intel_gt_init_buffer_pool(gt);
intel_gt_init_reset(gt);
intel_gt_init_requests(gt);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h
index 9157c7411f60..35d3bb13372f 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt.h
@@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt)
void intel_gt_info_print(const struct intel_gt_info *info,
struct drm_printer *p);
+void intel_gt_watchdog_work(struct work_struct *work);
+
#endif /* __INTEL_GT_H__ */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
index dc06c78c9eeb..71991b1eec4a 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c
@@ -9,6 +9,7 @@
#include "i915_drv.h" /* for_each_engine() */
#include "i915_request.h"
#include "intel_engine_heartbeat.h"
+#include "intel_execlists_submission.h"
#include "intel_gt.h"
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
@@ -243,4 +244,24 @@ void intel_gt_fini_requests(struct intel_gt *gt)
{
/* Wait until the work is marked as finished before unloading! */
cancel_delayed_work_sync(>->requests.retire_work);
+
+ flush_work(>->watchdog.work);
+}
+
+void intel_gt_watchdog_work(struct work_struct *work)
+{
+ struct intel_gt *gt =
+ container_of(work, typeof(*gt), watchdog.work);
+ struct i915_request *rq, *rn;
+ struct llist_node *first;
+
+ first = llist_del_all(>->watchdog.list);
+ if (!first)
+ return;
+
+ llist_for_each_entry_safe(rq, rn, first, watchdog.link) {
+ if (!i915_request_completed(rq))
+ i915_request_cancel(rq, -EINTR);
+ i915_request_put(rq);
+ }
}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index a83d3e18254d..b17fa14603f2 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -8,10 +8,12 @@
#include <linux/ktime.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/types.h>
+#include <linux/workqueue.h>
#include "uc/intel_uc.h"
@@ -56,6 +58,11 @@ struct intel_gt {
struct delayed_work retire_work;
} requests;
+ struct {
+ struct llist_head list;
+ struct work_struct work;
+ } watchdog;
+
struct intel_wakeref wakeref;
atomic_t user_wakeref;
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 7660a0a4602a..49aca80a8ebc 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -277,6 +277,53 @@ static void remove_from_engine(struct i915_request *rq)
__notify_execute_cb_imm(rq);
}
+static void __rq_init_watchdog(struct i915_request *rq)
+{
+ rq->watchdog.timer.function = NULL;
+}
+
+static enum hrtimer_restart __rq_watchdog_expired(struct hrtimer *hrtimer)
+{
+ struct i915_request *rq =
+ container_of(hrtimer, struct i915_request, watchdog.timer);
+ struct intel_gt *gt = rq->engine->gt;
+
+ if (!i915_request_completed(rq)) {
+ if (llist_add(&rq->watchdog.link, >->watchdog.list))
+ schedule_work(>->watchdog.work);
+ } else {
+ i915_request_put(rq);
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+static void __rq_arm_watchdog(struct i915_request *rq)
+{
+ struct i915_request_watchdog *wdg = &rq->watchdog;
+ struct intel_context *ce = rq->context;
+
+ if (!ce->watchdog.timeout_us)
+ return;
+
+ hrtimer_init(&wdg->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ wdg->timer.function = __rq_watchdog_expired;
+ hrtimer_start_range_ns(&wdg->timer,
+ ns_to_ktime(ce->watchdog.timeout_us *
+ NSEC_PER_USEC),
+ NSEC_PER_MSEC, /* FIXME check if it gives the "not sooner" guarantee or slack is both ways */
+ HRTIMER_MODE_REL);
+ i915_request_get(rq);
+}
+
+static void __rq_cancel_watchdog(struct i915_request *rq)
+{
+ struct i915_request_watchdog *wdg = &rq->watchdog;
+
+ if (wdg->timer.function && hrtimer_try_to_cancel(&wdg->timer) > 0)
+ i915_request_put(rq);
+}
+
bool i915_request_retire(struct i915_request *rq)
{
if (!i915_request_completed(rq))
@@ -288,6 +335,8 @@ bool i915_request_retire(struct i915_request *rq)
trace_i915_request_retire(rq);
i915_request_mark_complete(rq);
+ __rq_cancel_watchdog(rq);
+
/*
* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
@@ -747,6 +796,8 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
if (unlikely(fence->error))
i915_request_set_error_once(request, fence->error);
+ else
+ __rq_arm_watchdog(request);
/*
* We need to serialize use of the submit_request() callback
@@ -934,6 +985,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
/* No zalloc, everything must be cleared after use */
rq->batch = NULL;
+ __rq_init_watchdog(rq);
GEM_BUG_ON(rq->capture_list);
GEM_BUG_ON(!llist_empty(&rq->execute_cb));
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h
index 92f07c8cf0b8..3c7521badddc 100644
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -26,7 +26,9 @@
#define I915_REQUEST_H
#include <linux/dma-fence.h>
+#include <linux/hrtimer.h>
#include <linux/irq_work.h>
+#include <linux/llist.h>
#include <linux/lockdep.h>
#include "gem/i915_gem_context_types.h"
@@ -287,6 +289,12 @@ struct i915_request {
/** timeline->request entry for this request */
struct list_head link;
+ /** Watchdog support fields. */
+ struct i915_request_watchdog {
+ struct llist_node link;
+ struct hrtimer timer;
+ } watchdog;
+
I915_SELFTEST_DECLARE(struct {
struct list_head link;
unsigned long delay;
--
2.27.0
More information about the Intel-gfx-trybot
mailing list