[PATCH] hmm2

Tue Apr 4 13:59:46 UTC 2017

---
 drivers/gpu/drm/i915/i915_drv.h          |  9 +++----
 drivers/gpu/drm/i915/i915_gem_request.c  | 41 +++++++++++++++++++++++++++++---
 drivers/gpu/drm/i915/i915_irq.c          |  1 -
 drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 +++------
 drivers/gpu/drm/i915/intel_ringbuffer.h  |  3 ---
 5 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54fbff598371..7babf809633c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -4015,8 +4015,8 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
 	 * but it is easier and safer to do it every time the waiter
 	 * is woken.
 	 */
-	if (engine->irq_seqno_barrier &&
-	    test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
+	if (test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted) &&
+	    engine->irq_seqno_barrier) {
 		struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
 		/* The ordering of irq_posted versus applying the barrier
@@ -4049,12 +4049,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req)
 			 */
 			wake_up_process(b->irq_wait->tsk);
 		spin_unlock_irq(&b->irq_lock);
-
-		if (__i915_gem_request_completed(req, seqno))
-			return true;
 	}
 
-	return false;
+	return __i915_gem_request_completed(req, seqno);
 }
 
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 685a3c79534c..1d354e2e1b17 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -1024,7 +1024,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 			 u32 seqno, int state, unsigned long timeout_us)
 {
 	struct intel_engine_cs *engine = req->engine;
-	unsigned int irq, cpu;
+	unsigned int cpu;
 
 	/* When waiting for high frequency requests, e.g. during synchronous
 	 * rendering split between the CPU and GPU, the finite amount of time
@@ -1036,7 +1036,42 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 	 * takes to sleep on a request, on the order of a microsecond.
 	 */
 
-	irq = atomic_read(&engine->irq_count);
+	if (test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted) &&
+	    engine->irq_seqno_barrier) {
+		struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+		/* The ordering of irq_posted versus applying the barrier
+		 * is crucial. The clearing of the current irq_posted must
+		 * be visible before we perform the barrier operation,
+		 * such that if a subsequent interrupt arrives, irq_posted
+		 * is reasserted and our task rewoken (which causes us to
+		 * do another __i915_request_irq_complete() immediately
+		 * and reapply the barrier). Conversely, if the clear
+		 * occurs after the barrier, then an interrupt that arrived
+		 * whilst we waited on the barrier would not trigger a
+		 * barrier on the next pass, and the read may not see the
+		 * seqno update.
+		 */
+		engine->irq_seqno_barrier(engine);
+
+		/* If we consume the irq, but we are no longer the bottom-half,
+		 * the real bottom-half may not have serialised their own
+		 * seqno check with the irq-barrier (i.e. may have inspected
+		 * the seqno before we believe it coherent since they see
+		 * irq_posted == false but we are still running).
+		 */
+		spin_lock_irq(&b->irq_lock);
+		if (b->irq_wait && b->irq_wait->tsk != current)
+			/* Note that if the bottom-half is changed as we
+			 * are sending the wake-up, the new bottom-half will
+			 * be woken by whomever made the change. We only have
+			 * to worry about when we steal the irq-posted for
+			 * ourself.
+			 */
+			wake_up_process(b->irq_wait->tsk);
+		spin_unlock_irq(&b->irq_lock);
+	}
+
 	timeout_us += local_clock_us(&cpu);
 	do {
 		if (seqno != i915_gem_request_global_seqno(req))
@@ -1051,7 +1086,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
 		 * assume we won't see one in the near future but require
 		 * the engine->seqno_barrier() to fixup coherency.
 		 */
-		if (atomic_read(&engine->irq_count) != irq)
+		if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted))
 			break;
 
 		if (signal_pending_state(state, current))
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 4ddf70872111..1da5b85a4295 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1038,7 +1038,6 @@ static void notify_ring(struct intel_engine_cs *engine)
 	struct drm_i915_gem_request *rq = NULL;
 	struct intel_wait *wait;
 
-	atomic_inc(&engine->irq_count);
 	set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 
 	spin_lock(&engine->breadcrumbs.irq_lock);
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 9ccbf26124c6..659701002404 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -80,12 +80,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 	if (!b->irq_armed)
 		return;
 
-	if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) {
-		b->hangcheck_interrupts = atomic_read(&engine->irq_count);
-		mod_timer(&b->hangcheck, wait_timeout());
-		return;
-	}
-
 	/* We keep the hangcheck timer alive until we disarm the irq, even
 	 * if there are no waiters at present.
 	 *
@@ -98,7 +92,8 @@ static void intel_breadcrumbs_hangcheck(unsigned long data)
 	 * but we still have a waiter. Assuming all batches complete within
 	 * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
 	 */
-	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
+	if (!test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted) &&
+	    intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
 		missed_breadcrumb(engine);
 		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
 	} else {
@@ -220,7 +215,7 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b)
 	 * engine->seqno_barrier(), a timing error that should be transient
 	 * and unlikely to reoccur.
 	 */
-	return atomic_read(&engine->irq_count) == b->hangcheck_interrupts;
+	return test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
 }
 
 static void enable_fake_irq(struct intel_breadcrumbs *b)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 678b7debe72a..461a8bfe5f6f 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -191,7 +191,6 @@ struct intel_engine_cs {
 
 	struct intel_render_state *render_state;
 
-	atomic_t irq_count;
 	unsigned long irq_posted;
 #define ENGINE_IRQ_BREADCRUMB 0
 #define ENGINE_IRQ_EXECLIST 1
@@ -224,8 +223,6 @@ struct intel_engine_cs {
 		struct timer_list fake_irq; /* used after a missed interrupt */
 		struct timer_list hangcheck; /* detect missed interrupts */
 
-		unsigned int hangcheck_interrupts;
-
 		bool irq_armed : 1;
 		bool irq_enabled : 1;
 		I915_SELFTEST_DECLARE(bool mock : 1);
-- 
2.11.0