[PATCH 05/13] drm/radeon: rework gpu lockup detection and processing
Christian König
deathsimple at vodafone.de
Thu Apr 19 15:39:12 PDT 2012
Previusly multiple ring could trigger multiple GPU
resets at the same time.
Signed-off-by: Christian König <deathsimple at vodafone.de>
---
drivers/gpu/drm/radeon/radeon.h | 3 +-
drivers/gpu/drm/radeon/radeon_fence.c | 146 +++++++++++++++++----------------
2 files changed, 75 insertions(+), 74 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8801657..85a3aa9 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -255,8 +255,7 @@ struct radeon_fence_driver {
volatile uint32_t *cpu_addr;
atomic_t seq;
uint32_t last_seq;
- unsigned long last_jiffies;
- unsigned long last_timeout;
+ unsigned long last_activity;
wait_queue_head_t queue;
struct list_head created;
struct list_head emitted;
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 36c411f..1a9765a 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -74,6 +74,10 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
radeon_fence_ring_emit(rdev, fence->ring, fence);
trace_radeon_fence_emit(rdev->ddev, fence->seq);
fence->emitted = true;
+ /* are we the first fence on a previusly idle ring? */
+ if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
+ rdev->fence_drv[fence->ring].last_activity = jiffies;
+ }
list_move_tail(&fence->list, &rdev->fence_drv[fence->ring].emitted);
write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
return 0;
@@ -85,34 +89,14 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
struct list_head *i, *n;
uint32_t seq;
bool wake = false;
- unsigned long cjiffies;
seq = radeon_fence_read(rdev, ring);
- if (seq != rdev->fence_drv[ring].last_seq) {
- rdev->fence_drv[ring].last_seq = seq;
- rdev->fence_drv[ring].last_jiffies = jiffies;
- rdev->fence_drv[ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
- } else {
- cjiffies = jiffies;
- if (time_after(cjiffies, rdev->fence_drv[ring].last_jiffies)) {
- cjiffies -= rdev->fence_drv[ring].last_jiffies;
- if (time_after(rdev->fence_drv[ring].last_timeout, cjiffies)) {
- /* update the timeout */
- rdev->fence_drv[ring].last_timeout -= cjiffies;
- } else {
- /* the 500ms timeout is elapsed we should test
- * for GPU lockup
- */
- rdev->fence_drv[ring].last_timeout = 1;
- }
- } else {
- /* wrap around update last jiffies, we will just wait
- * a little longer
- */
- rdev->fence_drv[ring].last_jiffies = cjiffies;
- }
+ if (seq == rdev->fence_drv[ring].last_seq)
return false;
- }
+
+ rdev->fence_drv[ring].last_seq = seq;
+ rdev->fence_drv[ring].last_activity = jiffies;
+
n = NULL;
list_for_each(i, &rdev->fence_drv[ring].emitted) {
fence = list_entry(i, struct radeon_fence, list);
@@ -207,66 +191,84 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
struct radeon_device *rdev;
unsigned long irq_flags, timeout;
u32 seq;
- int r;
+ int i, r;
+ bool signaled;
if (fence == NULL) {
WARN(1, "Querying an invalid fence : %p !\n", fence);
- return 0;
+ return -EINVAL;
}
+
rdev = fence->rdev;
- if (radeon_fence_signaled(fence)) {
- return 0;
- }
- timeout = rdev->fence_drv[fence->ring].last_timeout;
-retry:
- /* save current sequence used to check for GPU lockup */
- seq = rdev->fence_drv[fence->ring].last_seq;
- trace_radeon_fence_wait_begin(rdev->ddev, seq);
- if (intr) {
+ signaled = radeon_fence_signaled(fence);
+ while (!signaled) {
+ read_lock_irqsave(&rdev->fence_lock, irq_flags);
+ timeout = jiffies - RADEON_FENCE_JIFFIES_TIMEOUT;
+ if (time_after(rdev->fence_drv[fence->ring].last_activity, timeout)) {
+ /* the normal case, timeout is somewhere before last_activity */
+ timeout = rdev->fence_drv[fence->ring].last_activity - timeout;
+ } else {
+ /* either jiffies wrapped around, or no fence was signaled in the last 500ms
+ * anyway we will just wait for the minimum amount and then check for a lockup */
+ timeout = 1;
+ }
+ /* save current sequence value used to check for GPU lockups */
+ seq = rdev->fence_drv[fence->ring].last_seq;
+ read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+
+ trace_radeon_fence_wait_begin(rdev->ddev, seq);
radeon_irq_kms_sw_irq_get(rdev, fence->ring);
- r = wait_event_interruptible_timeout(rdev->fence_drv[fence->ring].queue,
- radeon_fence_signaled(fence), timeout);
+ if (intr) {
+ r = wait_event_interruptible_timeout(
+ rdev->fence_drv[fence->ring].queue,
+ (signaled = radeon_fence_signaled(fence)), timeout);
+ } else {
+ r = wait_event_timeout(
+ rdev->fence_drv[fence->ring].queue,
+ (signaled = radeon_fence_signaled(fence)), timeout);
+ }
radeon_irq_kms_sw_irq_put(rdev, fence->ring);
if (unlikely(r < 0)) {
return r;
}
- } else {
- radeon_irq_kms_sw_irq_get(rdev, fence->ring);
- r = wait_event_timeout(rdev->fence_drv[fence->ring].queue,
- radeon_fence_signaled(fence), timeout);
- radeon_irq_kms_sw_irq_put(rdev, fence->ring);
- }
- trace_radeon_fence_wait_end(rdev->ddev, seq);
- if (unlikely(!radeon_fence_signaled(fence))) {
- /* we were interrupted for some reason and fence isn't
- * isn't signaled yet, resume wait
- */
- if (r) {
- timeout = r;
- goto retry;
- }
- /* don't protect read access to rdev->fence_drv[t].last_seq
- * if we experiencing a lockup the value doesn't change
- */
- if (seq == rdev->fence_drv[fence->ring].last_seq &&
- radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
-
- /* good news we believe it's a lockup */
- printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
- fence->seq, seq);
-
- /* mark the ring as not ready any more */
- rdev->ring[fence->ring].ready = false;
- r = radeon_gpu_reset(rdev);
- if (r)
- return r;
+ trace_radeon_fence_wait_end(rdev->ddev, seq);
+
+ if (unlikely(!signaled)) {
+ /* we were interrupted for some reason and fence
+ * isn't signaled yet, resume waiting */
+ if (r) {
+ continue;
+ }
+
+ write_lock_irqsave(&rdev->fence_lock, irq_flags);
+ /* check if sequence value has changed since last_activity */
+ if (seq != rdev->fence_drv[fence->ring].last_seq) {
+ write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+ continue;
+ }
+
+ /* change sequence value on all rings, so nobody else things there is a lockup */
+ for (i = 0; i < RADEON_NUM_RINGS; ++i)
+ rdev->fence_drv[i].last_seq -= 0x10000;
+ write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+
+ if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
+
+ /* good news we believe it's a lockup */
+ printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+ fence->seq, seq);
+
+ /* mark the ring as not ready any more */
+ rdev->ring[fence->ring].ready = false;
+ r = radeon_gpu_reset(rdev);
+ if (r)
+ return r;
+
+ write_lock_irqsave(&rdev->fence_lock, irq_flags);
+ rdev->fence_drv[fence->ring].last_activity = jiffies;
+ write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
+ }
}
- timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
- write_lock_irqsave(&rdev->fence_lock, irq_flags);
- rdev->fence_drv[fence->ring].last_timeout = RADEON_FENCE_JIFFIES_TIMEOUT;
- rdev->fence_drv[fence->ring].last_jiffies = jiffies;
- write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
- goto retry;
}
return 0;
}
--
1.7.5.4
More information about the dri-devel
mailing list