[PATCH 01/14] drm/i915: Add smp_rmb() to busy ioctl's RCU dance
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 6 13:33:55 UTC 2016
In the debate as to whether the second read of active->request is
ordered after the dependent reads of the first read of active->request,
just give in and throw a smp_rmb() in there so that ordering of loads is
assured.
v2: Explain the manual smp_rmb()
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Reviewed-by: Daniel Vetter <daniel.vetter at ffwll.ch>
---
drivers/gpu/drm/i915/Kconfig | 1 +
drivers/gpu/drm/i915/i915_debugfs.c | 21 +--
drivers/gpu/drm/i915/i915_drv.h | 13 +-
drivers/gpu/drm/i915/i915_gem.c | 35 ++--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +-
drivers/gpu/drm/i915/i915_gem_request.c | 49 ++++--
drivers/gpu/drm/i915/i915_gem_request.h | 22 ++-
drivers/gpu/drm/i915/i915_gpu_error.c | 256 +++++++++++++----------------
drivers/gpu/drm/i915/i915_guc_submission.c | 124 +++++++-------
drivers/gpu/drm/i915/i915_irq.c | 26 +--
drivers/gpu/drm/i915/intel_breadcrumbs.c | 71 +++++---
drivers/gpu/drm/i915/intel_engine_cs.c | 1 +
drivers/gpu/drm/i915/intel_guc.h | 9 +-
drivers/gpu/drm/i915/intel_guc_loader.c | 7 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 7 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 27 +--
16 files changed, 349 insertions(+), 330 deletions(-)
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 7769e469118f..7badcee88ebf 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -4,6 +4,7 @@ config DRM_I915
depends on X86 && PCI
select INTEL_GTT
select INTERVAL_TREE
+ select STOP_MACHINE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
select SHMEM
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9bd41581b592..40173d5b1d5d 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -787,8 +787,6 @@ static void i915_ring_seqno_info(struct seq_file *m,
seq_printf(m, "Current sequence (%s): %x\n",
engine->name, intel_engine_get_seqno(engine));
- seq_printf(m, "Current user interrupts (%s): %lx\n",
- engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
spin_lock(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1434,11 +1432,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
engine->hangcheck.seqno,
seqno[id],
engine->last_submitted_seqno);
- seq_printf(m, "\twaiters? %d\n",
- intel_engine_has_waiter(engine));
- seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
- engine->hangcheck.user_interrupts,
- READ_ONCE(engine->breadcrumbs.irq_wakeups));
+ seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
+ yesno(intel_engine_has_waiter(engine)),
+ yesno(test_bit(engine->id,
+ &dev_priv->gpu_error.missed_irq_rings)));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
@@ -2625,15 +2622,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
struct drm_info_node *node = m->private;
struct drm_device *dev = node->minor->dev;
struct drm_i915_private *dev_priv = to_i915(dev);
- struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj;
- u32 *log;
+ struct drm_i915_gem_object *obj;
int i = 0, pg;
- if (!log_obj)
+ if (!dev_priv->guc.log)
return 0;
- for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) {
- log = kmap_atomic(i915_gem_object_get_page(log_obj, pg));
+ obj = dev_priv->guc.log->obj;
+ for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
+ u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index feec00f769e1..826486d03e8e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -481,6 +481,8 @@ struct drm_i915_error_state {
struct kref ref;
struct timeval time;
+ struct drm_i915_private *i915;
+
char error_msg[128];
bool simulated;
int iommu;
@@ -517,6 +519,7 @@ struct drm_i915_error_state {
int num_waiters;
int hangcheck_score;
enum intel_engine_hangcheck_action hangcheck_action;
+ struct i915_address_space *vm;
int num_requests;
/* our own tracking of ring head and tail */
@@ -586,17 +589,15 @@ struct drm_i915_error_state {
u32 read_domains;
u32 write_domain;
s32 fence_reg:I915_MAX_NUM_FENCE_BITS;
- s32 pinned:2;
u32 tiling:2;
u32 dirty:1;
u32 purgeable:1;
u32 userptr:1;
s32 engine:4;
u32 cache_level:3;
- } **active_bo, **pinned_bo;
-
- u32 *active_bo_count, *pinned_bo_count;
- u32 vm_count;
+ } *active_bo[I915_NUM_ENGINES], *pinned_bo;
+ u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count;
+ struct i915_address_space *active_vm[I915_NUM_ENGINES];
};
struct intel_connector;
@@ -3848,7 +3849,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
* is woken.
*/
if (engine->irq_seqno_barrier &&
- READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
+ rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current &&
cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
struct task_struct *tsk;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f4f8eaa90f2a..cf94c6ed0ff5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2526,7 +2526,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
container_of(work, typeof(*dev_priv), gt.idle_work.work);
struct drm_device *dev = &dev_priv->drm;
struct intel_engine_cs *engine;
- unsigned int stuck_engines;
bool rearm_hangcheck;
if (!READ_ONCE(dev_priv->gt.awake))
@@ -2556,15 +2555,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
dev_priv->gt.awake = false;
rearm_hangcheck = false;
- /* As we have disabled hangcheck, we need to unstick any waiters still
- * hanging around. However, as we may be racing against the interrupt
- * handler or the waiters themselves, we skip enabling the fake-irq.
- */
- stuck_engines = intel_kick_waiters(dev_priv);
- if (unlikely(stuck_engines))
- DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
- stuck_engines);
-
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
intel_runtime_pm_put(dev_priv);
@@ -3735,7 +3725,7 @@ i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view));
}
-static __always_inline unsigned __busy_read_flag(unsigned int id)
+static __always_inline unsigned int __busy_read_flag(unsigned int id)
{
/* Note that we could alias engines in the execbuf API, but
* that would be very unwise as it prevents userspace from
@@ -3753,7 +3743,7 @@ static __always_inline unsigned int __busy_write_id(unsigned int id)
return id;
}
-static __always_inline unsigned
+static __always_inline unsigned int
__busy_set_if_active(const struct i915_gem_active *active,
unsigned int (*flag)(unsigned int id))
{
@@ -3770,19 +3760,34 @@ __busy_set_if_active(const struct i915_gem_active *active,
id = request->engine->exec_id;
- /* Check that the pointer wasn't reassigned and overwritten. */
+ /* Check that the pointer wasn't reassigned and overwritten.
+ *
+ * In __i915_gem_active_get_rcu(), we enforce ordering between
+ * the first rcu pointer dereference (imposing a
+ * read-dependency only on access through the pointer) and
+ * the second lockless access through the memory barrier
+ * following a successful atomic_inc_not_zero(). Here there
+ * is no such barrier, and so we must manually insert an
+ * explicit read barrier to ensure that the following
+ * access occurs after all the loads through the first
+ * pointer.
+ *
+ * The corresponding write barrier is part of
+ * rcu_assign_pointer().
+ */
+ smp_rmb();
if (request == rcu_access_pointer(active->request))
return flag(id);
} while (1);
}
-static inline unsigned
+static __always_inline unsigned int
busy_check_reader(const struct i915_gem_active *active)
{
return __busy_set_if_active(active, __busy_read_flag);
}
-static inline unsigned
+static __always_inline unsigned int
busy_check_writer(const struct i915_gem_active *active)
{
return __busy_set_if_active(active, __busy_write_id);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c494b79ded20..c8d13fea4b25 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1702,6 +1702,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err_batch_unpin;
}
+ /* Whilst this request exists, batch_obj will be on the
+ * active_list, and so will hold the active reference. Only when this
+ * request is retired will the the batch_obj be moved onto the
+ * inactive_list and lose its active reference. Hence we do not need
+ * to explicitly hold another reference here.
+ */
+ params->request->batch_obj = params->batch->obj;
+
ret = i915_gem_request_add_to_client(params->request, file);
if (ret)
goto err_request;
@@ -1720,7 +1728,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
ret = execbuf_submit(params, args, &eb->vmas);
err_request:
- __i915_add_request(params->request, params->batch->obj, ret == 0);
+ __i915_add_request(params->request, ret == 0);
err_batch_unpin:
/*
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 6a1661643d3d..c6f523e2879c 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -355,7 +355,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
if (req && i915_gem_request_completed(req))
i915_gem_request_retire(req);
- req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
+ /* Beware: Dragons be flying overhead.
+ *
+ * We use RCU to look up requests in flight. The lookups may
+ * race with the request being allocated from the slab freelist.
+ * That is the request we are writing to here, may be in the process
+ * of being read by __i915_gem_active_get_request_rcu(). As such,
+ * we have to be very careful when overwriting the contents. During
+ * the RCU lookup, we change chase the request->engine pointer,
+ * read the request->fence.seqno and increment the reference count.
+ *
+ * The reference count is incremented atomically. If it is zero,
+ * the lookup knows the request is unallocated and complete. Otherwise,
+ * it is either still in use, or has been reallocated and reset
+ * with fence_init(). This increment is safe for release as we check
+ * that the request we have a reference to and matches the active
+ * request.
+ *
+ * Before we increment the refcount, we chase the request->engine
+ * pointer. We must not call kmem_cache_zalloc() or else we set
+ * that pointer to NULL and cause a crash during the lookup. If
+ * we see the request is completed (based on the value of the
+ * old engine and seqno), the lookup is complete and reports NULL.
+ * If we decide the request is not completed (new engine or seqno),
+ * then we grab a reference and double check that it is still the
+ * active request - which it won't be and restart the lookup.
+ *
+ * Do not use kmem_cache_zalloc() here!
+ */
+ req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
if (!req)
return ERR_PTR(-ENOMEM);
@@ -375,6 +403,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
req->engine = engine;
req->ctx = i915_gem_context_get(ctx);
+ /* No zalloc, must clear what we need by hand */
+ req->signaling.wait.tsk = NULL;
+ req->previous_context = NULL;
+ req->file_priv = NULL;
+ req->batch_obj = NULL;
+ req->elsp_submitted = 0;
+
/*
* Reserve space in the ring buffer for all the commands required to
* eventually emit this request. This is to guarantee that the
@@ -426,9 +461,7 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine)
* request is not being tracked for completion but the work itself is
* going to happen on the hardware. This would be a Bad Thing(tm).
*/
-void __i915_add_request(struct drm_i915_gem_request *request,
- struct drm_i915_gem_object *obj,
- bool flush_caches)
+void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
{
struct intel_engine_cs *engine;
struct intel_ring *ring;
@@ -469,14 +502,6 @@ void __i915_add_request(struct drm_i915_gem_request *request,
request->head = request_start;
- /* Whilst this request exists, batch_obj will be on the
- * active_list, and so will hold the active reference. Only when this
- * request is retired will the the batch_obj be moved onto the
- * inactive_list and lose its active reference. Hence we do not need
- * to explicitly hold another reference here.
- */
- request->batch_obj = obj;
-
/* Seal the request and mark it as pending execution. Note that
* we may inspect this state, without holding any locks, during
* hangcheck. Hence we apply the barrier to ensure that we do not
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 3496e28785e7..d5176f9cc22f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -51,6 +51,13 @@ struct intel_signal_node {
* emission time to be associated with the request for tracking how far ahead
* of the GPU the submission is.
*
+ * When modifying this structure be very aware that we perform a lockless
+ * RCU lookup of it that may race against reallocation of the struct
+ * from the slab freelist. We intentionally do not zero the structure on
+ * allocation so that the lookup can use the dangling pointers (and is
+ * cogniscent that those pointers may be wrong). Instead, everything that
+ * needs to be initialised must be done so explicitly.
+ *
* The requests are reference counted.
*/
struct drm_i915_gem_request {
@@ -218,13 +225,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst,
*pdst = src;
}
-void __i915_add_request(struct drm_i915_gem_request *req,
- struct drm_i915_gem_object *batch_obj,
- bool flush_caches);
+void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches);
#define i915_add_request(req) \
- __i915_add_request(req, NULL, true)
+ __i915_add_request(req, true)
#define i915_add_request_no_flush(req) \
- __i915_add_request(req, NULL, false)
+ __i915_add_request(req, false)
struct intel_rps_client;
#define NO_WAITBOOST ERR_PTR(-1)
@@ -465,6 +470,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* just report the active tracker is idle. If the new request is
* incomplete, then we acquire a reference on it and check that
* it remained the active request.
+ *
+ * It is then imperative that we do not zero the request on
+ * reallocation, so that we can chase the dangling pointers!
+ * See i915_gem_request_alloc().
*/
do {
struct drm_i915_gem_request *request;
@@ -497,6 +506,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active)
* incremented) then the following read for rcu_access_pointer()
* must occur after the atomic operation and so confirm
* that this request is the one currently being tracked.
+ *
+ * The corresponding write barrier is part of
+ * rcu_assign_pointer().
*/
if (!request || request == rcu_access_pointer(active->request))
return rcu_pointer_handoff(request);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index eecb87063c88..ef8be7e05cbd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -28,6 +28,7 @@
*/
#include <generated/utsrelease.h>
+#include <linux/stop_machine.h>
#include "i915_drv.h"
static const char *engine_str(int engine)
@@ -42,16 +43,6 @@ static const char *engine_str(int engine)
}
}
-static const char *pin_flag(int pinned)
-{
- if (pinned > 0)
- return " P";
- else if (pinned < 0)
- return " p";
- else
- return "";
-}
-
static const char *tiling_flag(int tiling)
{
switch (tiling) {
@@ -189,7 +180,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
{
int i;
- err_printf(m, " %s [%d]:\n", name, count);
+ err_printf(m, "%s [%d]:\n", name, count);
while (count--) {
err_printf(m, " %08x_%08x %8u %02x %02x [ ",
@@ -202,7 +193,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
err_printf(m, "%02x ", err->rseqno[i]);
err_printf(m, "] %02x", err->wseqno);
- err_puts(m, pin_flag(err->pinned));
err_puts(m, tiling_flag(err->tiling));
err_puts(m, dirty_flag(err->dirty));
err_puts(m, purgeable_flag(err->purgeable));
@@ -414,18 +404,25 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
error_print_engine(m, &error->engine[i]);
}
- for (i = 0; i < error->vm_count; i++) {
- err_printf(m, "vm[%d]\n", i);
+ for (i = 0; i < I915_NUM_ENGINES; i++) {
+ if (!error->active_vm[i])
+ break;
- print_error_buffers(m, "Active",
+ err_printf(m, "Active vm[%d]\n", i);
+ for (j = 0; j < I915_NUM_ENGINES; j++) {
+ if (error->engine[j].vm == error->active_vm[i])
+ err_printf(m, " %s\n",
+ dev_priv->engine[j].name);
+ }
+ print_error_buffers(m, " Buffers",
error->active_bo[i],
error->active_bo_count[i]);
-
- print_error_buffers(m, "Pinned",
- error->pinned_bo[i],
- error->pinned_bo_count[i]);
}
+ print_error_buffers(m, "Pinned (global)",
+ error->pinned_bo,
+ error->pinned_bo_count);
+
for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
struct drm_i915_error_engine *ee = &error->engine[i];
@@ -626,13 +623,10 @@ static void i915_error_state_free(struct kref *error_ref)
i915_error_object_free(error->semaphore_obj);
- for (i = 0; i < error->vm_count; i++)
+ for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
kfree(error->active_bo[i]);
-
- kfree(error->active_bo);
- kfree(error->active_bo_count);
kfree(error->pinned_bo);
- kfree(error->pinned_bo_count);
+
kfree(error->overlay);
kfree(error->display);
kfree(error);
@@ -691,14 +685,12 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
dst->page_count = num_pages;
while (num_pages--) {
- unsigned long flags;
void *d;
d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
if (d == NULL)
goto unwind;
- local_irq_save(flags);
if (use_ggtt) {
void __iomem *s;
@@ -717,15 +709,10 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
page = i915_gem_object_get_page(src, i);
- drm_clflush_pages(&page, 1);
-
s = kmap_atomic(page);
memcpy(d, s, PAGE_SIZE);
kunmap_atomic(s);
-
- drm_clflush_pages(&page, 1);
}
- local_irq_restore(flags);
dst->pages[i++] = d;
reloc_offset += PAGE_SIZE;
@@ -778,9 +765,6 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->read_domains = obj->base.read_domains;
err->write_domain = obj->base.write_domain;
err->fence_reg = obj->fence_reg;
- err->pinned = 0;
- if (i915_gem_obj_is_pinned(obj))
- err->pinned = 1;
err->tiling = i915_gem_object_get_tiling(obj);
err->dirty = obj->dirty;
err->purgeable = obj->madv != I915_MADV_WILLNEED;
@@ -788,13 +772,17 @@ static void capture_bo(struct drm_i915_error_buffer *err,
err->cache_level = obj->cache_level;
}
-static u32 capture_active_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head)
+static u32 capture_error_bo(struct drm_i915_error_buffer *err,
+ int count, struct list_head *head,
+ bool pinned_only)
{
struct i915_vma *vma;
int i = 0;
list_for_each_entry(vma, head, vm_link) {
+ if (pinned_only && !i915_vma_is_pinned(vma))
+ continue;
+
capture_bo(err++, vma);
if (++i == count)
break;
@@ -803,28 +791,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
return i;
}
-static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
- int count, struct list_head *head,
- struct i915_address_space *vm)
-{
- struct drm_i915_gem_object *obj;
- struct drm_i915_error_buffer * const first = err;
- struct drm_i915_error_buffer * const last = err + count;
-
- list_for_each_entry(obj, head, global_list) {
- struct i915_vma *vma;
-
- if (err == last)
- break;
-
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->vm == vm && i915_vma_is_pinned(vma))
- capture_bo(err++, vma);
- }
-
- return err - first;
-}
-
/* Generate a semi-unique error code. The code is not meant to have meaning, The
* code's only purpose is to try to prevent false duplicated bug reports by
* grossly estimating a GPU error state.
@@ -1062,24 +1028,21 @@ static void error_record_engine_registers(struct drm_i915_error_state *error,
}
}
-
static void i915_gem_record_active_context(struct intel_engine_cs *engine,
struct drm_i915_error_state *error,
struct drm_i915_error_engine *ee)
{
struct drm_i915_private *dev_priv = engine->i915;
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
/* Currently render ring is the only HW context user */
if (engine->id != RCS || !error->ccid)
return;
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- if (!i915_gem_obj_ggtt_bound(obj))
- continue;
-
- if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
- ee->ctx = i915_error_ggtt_object_create(dev_priv, obj);
+ list_for_each_entry(vma, &dev_priv->ggtt.base.active_list, vm_link) {
+ if ((error->ccid & PAGE_MASK) == vma->node.start) {
+ ee->ctx = i915_error_object_create(dev_priv,
+ vma->obj, vma->vm);
break;
}
}
@@ -1115,10 +1078,9 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
request = i915_gem_find_active_request(engine);
if (request) {
- struct i915_address_space *vm;
struct intel_ring *ring;
- vm = request->ctx->ppgtt ?
+ ee->vm = request->ctx->ppgtt ?
&request->ctx->ppgtt->base : &ggtt->base;
/* We need to copy these to an anonymous buffer
@@ -1128,7 +1090,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
ee->batchbuffer =
i915_error_object_create(dev_priv,
request->batch_obj,
- vm);
+ ee->vm);
if (HAS_BROKEN_CS_TLB(dev_priv))
ee->wa_batchbuffer =
@@ -1210,89 +1172,85 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
}
}
-/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
- * VM.
- */
static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error,
struct i915_address_space *vm,
const int ndx)
{
- struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
- struct drm_i915_gem_object *obj;
+ struct drm_i915_error_buffer *active_bo;
struct i915_vma *vma;
int i;
i = 0;
list_for_each_entry(vma, &vm->active_list, vm_link)
i++;
- error->active_bo_count[ndx] = i;
- list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
- list_for_each_entry(vma, &obj->vma_list, obj_link)
- if (vma->vm == vm && i915_vma_is_pinned(vma))
- i++;
- }
- error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
-
- if (i) {
+ active_bo = NULL;
+ if (i)
active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC);
- if (active_bo)
- pinned_bo = active_bo + error->active_bo_count[ndx];
- }
-
if (active_bo)
- error->active_bo_count[ndx] =
- capture_active_bo(active_bo,
- error->active_bo_count[ndx],
- &vm->active_list);
-
- if (pinned_bo)
- error->pinned_bo_count[ndx] =
- capture_pinned_bo(pinned_bo,
- error->pinned_bo_count[ndx],
- &dev_priv->mm.bound_list, vm);
+ i = capture_error_bo(active_bo, i, &vm->active_list, false);
+ else
+ i = 0;
+
error->active_bo[ndx] = active_bo;
- error->pinned_bo[ndx] = pinned_bo;
+ error->active_bo_count[ndx] = i;
+ error->active_vm[ndx] = vm;
}
-static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
- struct drm_i915_error_state *error)
+static void i915_capture_active_buffers(struct drm_i915_private *dev_priv,
+ struct drm_i915_error_state *error)
{
- struct i915_address_space *vm;
- int cnt = 0, i = 0;
-
- list_for_each_entry(vm, &dev_priv->vm_list, global_link)
- cnt++;
-
- error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
- error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
- error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
- GFP_ATOMIC);
- error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
- GFP_ATOMIC);
-
- if (error->active_bo == NULL ||
- error->pinned_bo == NULL ||
- error->active_bo_count == NULL ||
- error->pinned_bo_count == NULL) {
- kfree(error->active_bo);
- kfree(error->active_bo_count);
- kfree(error->pinned_bo);
- kfree(error->pinned_bo_count);
-
- error->active_bo = NULL;
- error->active_bo_count = NULL;
- error->pinned_bo = NULL;
- error->pinned_bo_count = NULL;
- } else {
- list_for_each_entry(vm, &dev_priv->vm_list, global_link)
- i915_gem_capture_vm(dev_priv, error, vm, i++);
+ int cnt = 0, i, j;
+
+ BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
+ BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
+ BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
+
+ for (i = 0; i < I915_NUM_ENGINES; i++) {
+ struct drm_i915_error_engine *ee = &error->engine[i];
+
+ if (!ee->vm)
+ continue;
+
+ for (j = 0; j < i; j++)
+ if (error->engine[j].vm == ee->vm)
+ break;
+ if (j != i)
+ continue;
- error->vm_count = cnt;
+ i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++);
}
}
+static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv,
+ struct drm_i915_error_state *error)
+{
+ struct i915_address_space *vm = &dev_priv->ggtt.base;
+ struct drm_i915_error_buffer *bo;
+ struct i915_vma *vma;
+ int i, j;
+
+ i = 0;
+ list_for_each_entry(vma, &vm->active_list, vm_link)
+ i++;
+
+ j = 0;
+ list_for_each_entry(vma, &vm->inactive_list, vm_link)
+ j++;
+
+ bo = NULL;
+ if (i + j)
+ bo = kcalloc(i + j, sizeof(*bo), GFP_ATOMIC);
+ if (!bo)
+ return;
+
+ i = capture_error_bo(bo, i, &vm->active_list, true);
+ j = capture_error_bo(bo + i, j, &vm->inactive_list, true);
+ error->pinned_bo_count = i + j;
+ error->pinned_bo = bo;
+}
+
/* Capture all registers which don't fit into another category. */
static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
struct drm_i915_error_state *error)
@@ -1405,6 +1363,32 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv,
error->suspend_count = dev_priv->suspend_count;
}
+static int capture(void *data)
+{
+ struct drm_i915_error_state *error = data;
+
+ /* Ensure that what we readback from memory matches what the GPU sees */
+ wbinvd();
+
+ i915_capture_gen_state(error->i915, error);
+ i915_capture_reg_state(error->i915, error);
+ i915_gem_record_fences(error->i915, error);
+ i915_gem_record_rings(error->i915, error);
+
+ i915_capture_active_buffers(error->i915, error);
+ i915_capture_pinned_buffers(error->i915, error);
+
+ do_gettimeofday(&error->time);
+
+ error->overlay = intel_overlay_capture_error_state(error->i915);
+ error->display = intel_display_capture_error_state(error->i915);
+
+ /* And make sure we don't leave trash in the CPU cache */
+ wbinvd();
+
+ return 0;
+}
+
/**
* i915_capture_error_state - capture an error record for later analysis
* @dev: drm device
@@ -1433,17 +1417,9 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv,
}
kref_init(&error->ref);
+ error->i915 = dev_priv;
- i915_capture_gen_state(dev_priv, error);
- i915_capture_reg_state(dev_priv, error);
- i915_gem_capture_buffers(dev_priv, error);
- i915_gem_record_fences(dev_priv, error);
- i915_gem_record_rings(dev_priv, error);
-
- do_gettimeofday(&error->time);
-
- error->overlay = intel_overlay_capture_error_state(dev_priv);
- error->display = intel_display_capture_error_state(dev_priv);
+ stop_machine(capture, error, NULL);
i915_error_capture_msg(dev_priv, error, engine_mask, error_msg);
DRM_INFO("%s\n", error->error_msg);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 03a5cef353eb..b29e85880995 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -183,7 +183,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc,
struct i915_guc_client *client,
u16 new_id)
{
- struct sg_table *sg = guc->ctx_pool_obj->pages;
+ struct sg_table *sg = guc->ctx_pool->obj->pages;
void *doorbell_bitmap = guc->doorbell_bitmap;
struct guc_doorbell_info *doorbell;
struct guc_context_desc desc;
@@ -325,8 +325,8 @@ static void guc_init_proc_desc(struct intel_guc *guc,
static void guc_init_ctx_desc(struct intel_guc *guc,
struct i915_guc_client *client)
{
- struct drm_i915_gem_object *client_obj = client->client_obj;
struct drm_i915_private *dev_priv = guc_to_i915(guc);
+ struct drm_i915_gem_object *client_obj = client->client->obj;
struct intel_engine_cs *engine;
struct i915_gem_context *ctx = client->owner;
struct guc_context_desc desc;
@@ -380,7 +380,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
* The doorbell, process descriptor, and workqueue are all parts
* of the client object, which the GuC will reference via the GGTT
*/
- gfx_addr = i915_gem_obj_ggtt_offset(client_obj);
+ gfx_addr = client->client->node.start;
desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) +
client->doorbell_offset;
desc.db_trigger_cpu = (uintptr_t)client->client_base +
@@ -397,7 +397,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc,
desc.desc_private = (uintptr_t)client;
/* Pool context is pinned already */
- sg = guc->ctx_pool_obj->pages;
+ sg = guc->ctx_pool->obj->pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
sizeof(desc) * client->ctx_index);
}
@@ -410,7 +410,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc,
memset(&desc, 0, sizeof(desc));
- sg = guc->ctx_pool_obj->pages;
+ sg = guc->ctx_pool->obj->pages;
sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
sizeof(desc) * client->ctx_index);
}
@@ -492,7 +492,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc,
/* WQ starts from the page after doorbell / process_desc */
wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT;
wq_off &= PAGE_SIZE - 1;
- base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page));
+ base = kmap_atomic(i915_gem_object_get_page(gc->client->obj, wq_page));
wqi = (struct guc_wq_item *)((char *)base + wq_off);
/* Now fill in the 4-word work queue item */
@@ -611,8 +611,8 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
*/
/**
- * gem_allocate_guc_obj() - Allocate gem object for GuC usage
- * @dev_priv: driver private data structure
+ * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage
+ * @guc: the guc
* @size: size of object
*
* This is a wrapper to create a gem obj. In order to use it inside GuC, the
@@ -621,45 +621,40 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq)
*
* Return: A drm_i915_gem_object if successful, otherwise NULL.
*/
-static struct drm_i915_gem_object *
-gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size)
+static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size)
{
+ struct drm_i915_private *dev_priv = guc_to_i915(guc);
struct drm_i915_gem_object *obj;
+ int ret;
obj = i915_gem_object_create(&dev_priv->drm, size);
if (IS_ERR(obj))
- return NULL;
-
- if (i915_gem_object_get_pages(obj)) {
- i915_gem_object_put(obj);
- return NULL;
- }
+ return ERR_CAST(obj);
- if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
- PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) {
+ ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
+ PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
+ if (ret) {
i915_gem_object_put(obj);
- return NULL;
+ return ERR_PTR(ret);
}
/* Invalidate GuC TLB to let GuC take the latest updates to GTT. */
I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
- return obj;
+ return i915_gem_obj_to_ggtt(obj);
}
/**
- * gem_release_guc_obj() - Release gem object allocated for GuC usage
- * @obj: gem obj to be released
+ * guc_release_vma() - Release gem object allocated for GuC usage
+ * @vma: gem obj to be released
*/
-static void gem_release_guc_obj(struct drm_i915_gem_object *obj)
+static void guc_release_vma(struct i915_vma *vma)
{
- if (!obj)
+ if (!vma)
return;
- if (i915_gem_obj_is_pinned(obj))
- i915_gem_object_ggtt_unpin(obj);
-
- i915_gem_object_put(obj);
+ i915_vma_unpin(vma);
+ i915_gem_object_put(vma->obj);
}
static void
@@ -686,7 +681,7 @@ guc_client_free(struct drm_i915_private *dev_priv,
kunmap(kmap_to_page(client->client_base));
}
- gem_release_guc_obj(client->client_obj);
+ guc_release_vma(client->client);
if (client->ctx_index != GUC_INVALID_CTX_ID) {
guc_fini_ctx_desc(guc, client);
@@ -757,7 +752,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
{
struct i915_guc_client *client;
struct intel_guc *guc = &dev_priv->guc;
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
uint16_t db_id;
client = kzalloc(sizeof(*client), GFP_KERNEL);
@@ -777,13 +772,13 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
}
/* The first page is doorbell/proc_desc. Two followed pages are wq. */
- obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE);
- if (!obj)
+ vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
+ if (IS_ERR(vma))
goto err;
/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
- client->client_obj = obj;
- client->client_base = kmap(i915_gem_object_get_page(obj, 0));
+ client->client = vma;
+ client->client_base = kmap(i915_gem_object_get_page(vma->obj, 0));
client->wq_offset = GUC_DB_SIZE;
client->wq_size = GUC_WQ_SIZE;
@@ -825,8 +820,7 @@ err:
static void guc_create_log(struct intel_guc *guc)
{
- struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
unsigned long offset;
uint32_t size, flags;
@@ -842,16 +836,16 @@ static void guc_create_log(struct intel_guc *guc)
GUC_LOG_ISR_PAGES + 1 +
GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
- obj = guc->log_obj;
- if (!obj) {
- obj = gem_allocate_guc_obj(dev_priv, size);
- if (!obj) {
+ vma = guc->log;
+ if (!vma) {
+ vma = guc_allocate_vma(guc, size);
+ if (IS_ERR(vma)) {
/* logging will be off */
i915.guc_log_level = -1;
return;
}
- guc->log_obj = obj;
+ guc->log = vma;
}
/* each allocated unit is a page */
@@ -860,7 +854,7 @@ static void guc_create_log(struct intel_guc *guc)
(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
- offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */
+ offset = vma->node.start >> PAGE_SHIFT; /* in pages */
guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
}
@@ -889,7 +883,7 @@ static void init_guc_policies(struct guc_policies *policies)
static void guc_create_ads(struct intel_guc *guc)
{
struct drm_i915_private *dev_priv = guc_to_i915(guc);
- struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
struct guc_ads *ads;
struct guc_policies *policies;
struct guc_mmio_reg_state *reg_state;
@@ -902,16 +896,16 @@ static void guc_create_ads(struct intel_guc *guc)
sizeof(struct guc_mmio_reg_state) +
GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE;
- obj = guc->ads_obj;
- if (!obj) {
- obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size));
- if (!obj)
+ vma = guc->ads;
+ if (!vma) {
+ vma = guc_allocate_vma(guc, PAGE_ALIGN(size));
+ if (IS_ERR(vma))
return;
- guc->ads_obj = obj;
+ guc->ads = vma;
}
- page = i915_gem_object_get_page(obj, 0);
+ page = i915_gem_object_get_page(vma->obj, 0);
ads = kmap(page);
/*
@@ -931,8 +925,7 @@ static void guc_create_ads(struct intel_guc *guc)
policies = (void *)ads + sizeof(struct guc_ads);
init_guc_policies(policies);
- ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) +
- sizeof(struct guc_ads);
+ ads->scheduler_policies = vma->node.start + sizeof(struct guc_ads);
/* MMIO reg state */
reg_state = (void *)policies + sizeof(struct guc_policies);
@@ -960,10 +953,9 @@ static void guc_create_ads(struct intel_guc *guc)
*/
int i915_guc_submission_init(struct drm_i915_private *dev_priv)
{
- const size_t ctxsize = sizeof(struct guc_context_desc);
- const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
- const size_t gemsize = round_up(poolsize, PAGE_SIZE);
struct intel_guc *guc = &dev_priv->guc;
+ struct i915_vma *vma;
+ u32 size;
/* Wipe bitmap & delete client in case of reinitialisation */
bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
@@ -972,13 +964,15 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv)
if (!i915.enable_guc_submission)
return 0; /* not enabled */
- if (guc->ctx_pool_obj)
+ if (guc->ctx_pool)
return 0; /* already allocated */
- guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize);
- if (!guc->ctx_pool_obj)
- return -ENOMEM;
+ size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc));
+ vma = guc_allocate_vma(guc, size);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
+ guc->ctx_pool = vma;
ida_init(&guc->ctx_ids);
guc_create_log(guc);
guc_create_ads(guc);
@@ -1030,16 +1024,16 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
{
struct intel_guc *guc = &dev_priv->guc;
- gem_release_guc_obj(dev_priv->guc.ads_obj);
- guc->ads_obj = NULL;
+ guc_release_vma(guc->ads);
+ guc->ads = NULL;
- gem_release_guc_obj(dev_priv->guc.log_obj);
- guc->log_obj = NULL;
+ guc_release_vma(guc->log);
+ guc->log = NULL;
- if (guc->ctx_pool_obj)
+ if (guc->ctx_pool)
ida_destroy(&guc->ctx_ids);
- gem_release_guc_obj(guc->ctx_pool_obj);
- guc->ctx_pool_obj = NULL;
+ guc_release_vma(guc->ctx_pool);
+ guc->ctx_pool = NULL;
}
/**
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 591f452ece68..ebb83d5a448b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -972,10 +972,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv)
static void notify_ring(struct intel_engine_cs *engine)
{
smp_store_mb(engine->breadcrumbs.irq_posted, true);
- if (intel_engine_wakeup(engine)) {
+ if (intel_engine_wakeup(engine))
trace_i915_gem_request_notify(engine);
- engine->breadcrumbs.irq_wakeups++;
- }
}
static void vlv_c0_read(struct drm_i915_private *dev_priv,
@@ -3044,22 +3042,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
return HANGCHECK_HUNG;
}
-static unsigned long kick_waiters(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
-
- if (engine->hangcheck.user_interrupts == irq_count &&
- !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
- if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
- DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
- engine->name);
-
- intel_engine_enable_fake_irq(engine);
- }
-
- return irq_count;
-}
/*
* This is called when the chip hasn't reported back with completed
* batchbuffers in a long time. We keep track per ring seqno progress and
@@ -3097,7 +3079,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
bool busy = intel_engine_has_waiter(engine);
u64 acthd;
u32 seqno;
- unsigned user_interrupts;
semaphore_clear_deadlocks(dev_priv);
@@ -3114,15 +3095,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
acthd = intel_engine_get_active_head(engine);
seqno = intel_engine_get_seqno(engine);
- /* Reset stuck interrupts between batch advances */
- user_interrupts = 0;
-
if (engine->hangcheck.seqno == seqno) {
if (!intel_engine_is_active(engine)) {
engine->hangcheck.action = HANGCHECK_IDLE;
if (busy) {
/* Safeguard against driver failure */
- user_interrupts = kick_waiters(engine);
engine->hangcheck.score += BUSY;
}
} else {
@@ -3185,7 +3162,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
engine->hangcheck.seqno = seqno;
engine->hangcheck.acthd = acthd;
- engine->hangcheck.user_interrupts = user_interrupts;
busy_count += busy;
}
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 90867446f1a5..7552bd039565 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -26,6 +26,29 @@
#include "i915_drv.h"
+static void intel_breadcrumbs_hangcheck(unsigned long data)
+{
+ struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ if (!b->irq_enabled)
+ return;
+
+ if (time_before(jiffies, b->timeout)) {
+ mod_timer(&b->hangcheck, b->timeout);
+ return;
+ }
+
+ DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
+ set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+ mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+}
+
+static unsigned long wait_timeout(void)
+{
+ return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
static void intel_breadcrumbs_fake_irq(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
@@ -37,10 +60,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data)
* every jiffie in order to kick the oldest waiter to do the
* coherent seqno check.
*/
- rcu_read_lock();
if (intel_engine_wakeup(engine))
mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
- rcu_read_unlock();
}
static void irq_enable(struct intel_engine_cs *engine)
@@ -51,13 +72,6 @@ static void irq_enable(struct intel_engine_cs *engine)
*/
engine->breadcrumbs.irq_posted = true;
- /* Make sure the current hangcheck doesn't falsely accuse a just
- * started irq handler from missing an interrupt (because the
- * interrupt count still matches the stale value from when
- * the irq handler was disabled, many hangchecks ago).
- */
- engine->breadcrumbs.irq_wakeups++;
-
spin_lock_irq(&engine->i915->irq_lock);
engine->irq_enable(engine);
spin_unlock_irq(&engine->i915->irq_lock);
@@ -98,8 +112,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
}
if (!b->irq_enabled ||
- test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
+ test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
mod_timer(&b->fake_irq, jiffies + 1);
+ } else {
+ /* Ensure we never sleep indefinitely */
+ GEM_BUG_ON(!time_after(b->timeout, jiffies));
+ mod_timer(&b->hangcheck, b->timeout);
+ }
/* Ensure that even if the GPU hangs, we get woken up.
*
@@ -211,7 +230,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
}
rb_link_node(&wait->node, parent, p);
rb_insert_color(&wait->node, &b->waiters);
- GEM_BUG_ON(!first && !b->irq_seqno_bh);
+ GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh));
if (completed) {
struct rb_node *next = rb_next(completed);
@@ -219,8 +238,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
GEM_BUG_ON(!next && !first);
if (next && next != &wait->node) {
GEM_BUG_ON(first);
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
- smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
+ rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
* seqno, processing the completed tasks and selecting
* the next waiter, we may have missed the interrupt
@@ -245,8 +265,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
+ b->timeout = wait_timeout();
b->first_wait = wait;
- smp_store_mb(b->irq_seqno_bh, wait->tsk);
+ rcu_assign_pointer(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
* perform a cursory check to prevent a missed interrupt.
* Either we miss the interrupt whilst programming the hardware,
@@ -257,7 +278,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
*/
__intel_breadcrumbs_enable_irq(b);
}
- GEM_BUG_ON(!b->irq_seqno_bh);
+ GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh));
GEM_BUG_ON(!b->first_wait);
GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
@@ -277,11 +298,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine,
return first;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
-{
- mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-}
-
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
return rb && to_wait(rb)->tsk->prio <= priority;
@@ -317,7 +333,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
const int priority = wakeup_priority(b, wait->tsk);
struct rb_node *next;
- GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
+ GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk);
/* We are the current bottom-half. Find the next candidate,
* the first waiter in the queue on the remaining oldest
@@ -359,14 +375,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine,
* the interrupt, or if we have to handle an
* exception rather than a seqno completion.
*/
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
- smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
+ rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
__intel_breadcrumbs_enable_irq(b);
- wake_up_process(b->irq_seqno_bh);
+ wake_up_process(b->first_wait->tsk);
} else {
b->first_wait = NULL;
- WRITE_ONCE(b->irq_seqno_bh, NULL);
+ rcu_assign_pointer(b->irq_seqno_bh, NULL);
__intel_breadcrumbs_disable_irq(b);
}
} else {
@@ -380,7 +397,7 @@ out_unlock:
GEM_BUG_ON(b->first_wait == wait);
GEM_BUG_ON(rb_first(&b->waiters) !=
(b->first_wait ? &b->first_wait->node : NULL));
- GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
+ GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters));
spin_unlock(&b->lock);
}
@@ -533,6 +550,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
struct task_struct *tsk;
spin_lock_init(&b->lock);
+ setup_timer(&b->hangcheck,
+ intel_breadcrumbs_hangcheck,
+ (unsigned long)engine);
setup_timer(&b->fake_irq,
intel_breadcrumbs_fake_irq,
(unsigned long)engine);
@@ -561,6 +581,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
kthread_stop(b->signaler);
del_timer_sync(&b->fake_irq);
+ del_timer_sync(&b->hangcheck);
}
unsigned int intel_kick_waiters(struct drm_i915_private *i915)
@@ -573,11 +594,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915)
* RCU lock, i.e. as we call wake_up_process() we must be holding the
* rcu_read_lock().
*/
- rcu_read_lock();
for_each_engine(engine, i915)
if (unlikely(intel_engine_wakeup(engine)))
mask |= intel_engine_flag(engine);
- rcu_read_unlock();
return mask;
}
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index e9b301ae2d0c..0dd3d1de18aa 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -164,6 +164,7 @@ cleanup:
void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
{
memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+ clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
static void intel_engine_init_requests(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 623cf26cd784..a8da563cadb7 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -63,7 +63,7 @@ struct drm_i915_gem_request;
* retcode: errno from last guc_submit()
*/
struct i915_guc_client {
- struct drm_i915_gem_object *client_obj;
+ struct i915_vma *client;
void *client_base; /* first page (only) of above */
struct i915_gem_context *owner;
struct intel_guc *guc;
@@ -125,11 +125,10 @@ struct intel_guc_fw {
struct intel_guc {
struct intel_guc_fw guc_fw;
uint32_t log_flags;
- struct drm_i915_gem_object *log_obj;
+ struct i915_vma *log;
- struct drm_i915_gem_object *ads_obj;
-
- struct drm_i915_gem_object *ctx_pool_obj;
+ struct i915_vma *ads;
+ struct i915_vma *ctx_pool;
struct ida ctx_ids;
struct i915_guc_client *execbuf_client;
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 3763e30cc165..58ef4418a2ef 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -181,16 +181,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv)
i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT;
}
- if (guc->ads_obj) {
- u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj)
- >> PAGE_SHIFT;
+ if (guc->ads) {
+ u32 ads = (u32)guc->ads->node.start >> PAGE_SHIFT;
params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
}
/* If GuC submission is enabled, set up additional parameters here */
if (i915.enable_guc_submission) {
- u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj);
+ u32 pgs = dev_priv->guc.ctx_pool->node.start;
u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
pgs >>= PAGE_SHIFT;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e08a1e1b04e4..09f01c641c14 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2093,7 +2093,7 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
if (ce->state) {
ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0,
- ctx->ggtt_alignment, 0);
+ ctx->ggtt_alignment, PIN_HIGH);
if (ret)
goto error;
}
@@ -2410,9 +2410,7 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno)
/* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno.
*/
- rcu_read_lock();
intel_engine_wakeup(engine);
- rcu_read_unlock();
}
static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
@@ -2631,7 +2629,8 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv,
i915.semaphores = 0;
} else {
i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
- ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
+ ret = i915_gem_object_ggtt_pin(obj, NULL,
+ 0, 0, PIN_HIGH);
if (ret != 0) {
i915_gem_object_put(obj);
DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 43e545e44352..66dc93469076 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action {
struct intel_engine_hangcheck {
u64 acthd;
- unsigned long user_interrupts;
u32 seqno;
int score;
enum intel_engine_hangcheck_action action;
@@ -172,8 +171,7 @@ struct intel_engine_cs {
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
- struct task_struct *irq_seqno_bh; /* bh for user interrupts */
- unsigned long irq_wakeups;
+ struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */
bool irq_posted;
spinlock_t lock; /* protects the lists of requests */
@@ -183,6 +181,9 @@ struct intel_engine_cs {
struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
+ struct timer_list hangcheck; /* detect missed interrupts */
+
+ unsigned long timeout;
bool irq_enabled : 1;
bool rpm_wakelock : 1;
@@ -540,27 +541,33 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request);
static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
{
- return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+ return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh);
}
static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
{
bool wakeup = false;
- struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
+
/* Note that for this not to dangerously chase a dangling pointer,
- * the caller is responsible for ensure that the task remain valid for
- * wake_up_process() i.e. that the RCU grace period cannot expire.
+ * we must hold the rcu_read_lock here.
*
* Also note that tsk is likely to be in !TASK_RUNNING state so an
* early test for tsk->state != TASK_RUNNING before wake_up_process()
* is unlikely to be beneficial.
*/
- if (tsk)
- wakeup = wake_up_process(tsk);
+ if (rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh)) {
+ struct task_struct *tsk;
+
+ rcu_read_lock();
+ tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh);
+ if (tsk)
+ wakeup = wake_up_process(tsk);
+ rcu_read_unlock();
+ }
+
return wakeup;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_kick_waiters(struct drm_i915_private *i915);
unsigned int intel_kick_signalers(struct drm_i915_private *i915);
--
2.8.1
More information about the Intel-gfx-trybot
mailing list