[PATCH] drm/i915/oa: rmb ftw

Chris Wilson chris at chris-wilson.co.uk
Mon Feb 18 22:44:59 UTC 2019


---
 drivers/gpu/drm/i915/i915_drv.h  |  53 ------
 drivers/gpu/drm/i915/i915_perf.c | 269 +++----------------------------
 2 files changed, 25 insertions(+), 297 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d0489a1cd..7415e64e1fd3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1899,59 +1899,6 @@ struct drm_i915_private {
 				int format;
 				int format_size;
 
-				/**
-				 * Locks reads and writes to all head/tail state
-				 *
-				 * Consider: the head and tail pointer state
-				 * needs to be read consistently from a hrtimer
-				 * callback (atomic context) and read() fop
-				 * (user context) with tail pointer updates
-				 * happening in atomic context and head updates
-				 * in user context and the (unlikely)
-				 * possibility of read() errors needing to
-				 * reset all head/tail state.
-				 *
-				 * Note: Contention or performance aren't
-				 * currently a significant concern here
-				 * considering the relatively low frequency of
-				 * hrtimer callbacks (5ms period) and that
-				 * reads typically only happen in response to a
-				 * hrtimer event and likely complete before the
-				 * next callback.
-				 *
-				 * Note: This lock is not held *while* reading
-				 * and copying data to userspace so the value
-				 * of head observed in htrimer callbacks won't
-				 * represent any partial consumption of data.
-				 */
-				spinlock_t ptr_lock;
-
-				/**
-				 * One 'aging' tail pointer and one 'aged'
-				 * tail pointer ready to used for reading.
-				 *
-				 * Initial values of 0xffffffff are invalid
-				 * and imply that an update is required
-				 * (and should be ignored by an attempted
-				 * read)
-				 */
-				struct {
-					u32 offset;
-				} tails[2];
-
-				/**
-				 * Index for the aged tail ready to read()
-				 * data up to.
-				 */
-				unsigned int aged_tail_idx;
-
-				/**
-				 * A monotonic timestamp for when the current
-				 * aging tail pointer was read; used to
-				 * determine when it is old enough to trust.
-				 */
-				u64 aging_timestamp;
-
 				/**
 				 * Although we can always read back the head
 				 * pointer register, we prefer to avoid
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9ebf99f3d8d3..66ba61cbc747 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -217,53 +217,8 @@
  * that the overflow cases are unlikely in normal operation.
  */
 #define OA_BUFFER_SIZE		SZ_16M
-
 #define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
 
-/**
- * DOC: OA Tail Pointer Race
- *
- * There's a HW race condition between OA unit tail pointer register updates and
- * writes to memory whereby the tail pointer can sometimes get ahead of what's
- * been written out to the OA buffer so far (in terms of what's visible to the
- * CPU).
- *
- * Although this can be observed explicitly while copying reports to userspace
- * by checking for a zeroed report-id field in tail reports, we want to account
- * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
- * read() attempts.
- *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- *  1) An 'aging' tail with an associated timestamp that is tracked until we
- *     can trust the corresponding data is visible to the CPU; at which point
- *     it is considered 'aged'.
- *  2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
- *
- * Most of the implementation details for this workaround are in
- * oa_buffer_check_unlocked() and _append_oa_reports()
- *
- * Note for posterity: previously the driver used to define an effective tail
- * pointer that lagged the real pointer by a 'tail margin' measured in bytes
- * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
- * This was flawed considering that the OA unit may also automatically generate
- * non-periodic reports (such as on context switch) or the OA unit may be
- * enabled without any periodic sampling.
- */
-#define OA_TAIL_MARGIN_NSEC	100000ULL
-#define INVALID_TAIL_PTR	0xffffffff
-
 /* frequency for checking whether the OA unit has written new reports to the
  * circular OA buffer...
  */
@@ -418,9 +373,7 @@ static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
 
 static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
 {
-	u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
-
-	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+	return I915_READ(GEN7_OASTATUS1) & GEN7_OASTATUS1_TAIL_MASK;
 }
 
 /**
@@ -431,112 +384,17 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
  * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
  * if there is data available for userspace to read.
  *
- * This function is central to providing a workaround for the OA unit tail
- * pointer having a race with respect to what data is visible to the CPU.
- * It is responsible for reading tail pointers from the hardware and giving
- * the pointers time to 'age' before they are made available for reading.
- * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
- *
- * Besides returning true when there is data available to read() this function
- * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
- * and .aged_tail_idx state used for reading.
- *
- * Note: It's safe to read OA config state here unlocked, assuming that this is
- * only called while the stream is enabled, while the global OA configuration
- * can't be modified.
- *
  * Returns: %true if the OA buffer contains data, else %false
  */
 static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 {
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	unsigned long flags;
-	unsigned int aged_idx;
-	u32 head, hw_tail, aged_tail, aging_tail;
-	u64 now;
-
-	/* We have to consider the (unlikely) possibility that read() errors
-	 * could result in an OA buffer reset which might reset the head,
-	 * tails[] and aged_tail state.
-	 */
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/* NB: The head we observe here might effectively be a little out of
-	 * date (between head and tails[aged_idx].offset if there is currently
-	 * a read() in progress.
-	 */
-	head = dev_priv->perf.oa.oa_buffer.head;
-
-	aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
-	aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
-
-	hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
-
-	/* The tail pointer increases in 64 byte increments,
-	 * not in report_size steps...
-	 */
-	hw_tail &= ~(report_size - 1);
-
-	now = ktime_get_mono_fast_ns();
-
-	/* Update the aged tail
-	 *
-	 * Flip the tail pointer available for read()s once the aging tail is
-	 * old enough to trust that the corresponding data will be visible to
-	 * the CPU...
-	 *
-	 * Do this before updating the aging pointer in case we may be able to
-	 * immediately start aging a new pointer too (if new data has become
-	 * available) without needing to wait for a later hrtimer callback.
-	 */
-	if (aging_tail != INVALID_TAIL_PTR &&
-	    ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
-	     OA_TAIL_MARGIN_NSEC)) {
-
-		aged_idx ^= 1;
-		dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
-
-		aged_tail = aging_tail;
-
-		/* Mark that we need a new pointer to start aging... */
-		dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
-		aging_tail = INVALID_TAIL_PTR;
-	}
-
-	/* Update the aging tail
-	 *
-	 * We throttle aging tail updates until we have a new tail that
-	 * represents >= one report more data than is already available for
-	 * reading. This ensures there will be enough data for a successful
-	 * read once this new pointer has aged and ensures we will give the new
-	 * pointer time to age.
-	 */
-	if (aging_tail == INVALID_TAIL_PTR &&
-	    (aged_tail == INVALID_TAIL_PTR ||
-	     OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
-		struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
-		u32 gtt_offset = i915_ggtt_offset(vma);
-
-		/* Be paranoid and do a bounds check on the pointer read back
-		 * from hardware, just in case some spurious hardware condition
-		 * could put the tail out of bounds...
-		 */
-		if (hw_tail >= gtt_offset &&
-		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
-			dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
-				aging_tail = hw_tail;
-			dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
-		} else {
-			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
-				  hw_tail);
-		}
-	}
+	u32 head, tail;
 
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+	head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+	tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
 
-	return aged_tail == INVALID_TAIL_PTR ?
-		false : OA_TAKEN(aged_tail, head) >= report_size;
+	return OA_TAKEN(tail, head) >= report_size;
 }
 
 /**
@@ -654,8 +512,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
 	u32 mask = (OA_BUFFER_SIZE - 1);
 	size_t start_offset = *offset;
-	unsigned long flags;
-	unsigned int aged_tail_idx;
 	u32 head, tail;
 	u32 taken;
 	int ret = 0;
@@ -663,20 +519,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	if (WARN_ON(!stream->enabled))
 		return -EIO;
 
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	head = dev_priv->perf.oa.oa_buffer.head;
-	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/*
-	 * An invalid tail pointer here means we're still waiting for the poll
-	 * hrtimer callback to give us a pointer
-	 */
-	if (tail == INVALID_TAIL_PTR)
-		return -EAGAIN;
+	head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+	tail = gen8_oa_hw_tail_read(dev_priv);
+	rmb();
 
 	/*
 	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
@@ -685,22 +530,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	head -= gtt_offset;
 	tail -= gtt_offset;
 
-	/*
-	 * An out of bounds or misaligned head or tail pointer implies a driver
-	 * bug since we validate + align the tail pointers we read from the
-	 * hardware and we are in full control of the head pointer which should
-	 * only be incremented by multiples of the report size (notably also
-	 * all a power of two).
-	 */
-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
-		      tail > OA_BUFFER_SIZE || tail % report_size,
-		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
-		      head, tail))
-		return -EIO;
-
-
 	for (/* none */;
-	     (taken = OA_TAKEN(tail, head));
+	     (taken = OA_TAKEN(tail, head)) >= report_size;
 	     head = (head + report_size) & mask) {
 		u8 *report = oa_buf_base + head;
 		u32 *report32 = (void *)report;
@@ -816,8 +647,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	}
 
 	if (start_offset != *offset) {
-		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
 		/*
 		 * We removed the gtt_offset for the copy loop above, indexing
 		 * relative to oa_buf_base so put back here...
@@ -825,9 +654,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		head += gtt_offset;
 
 		I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
-		dev_priv->perf.oa.oa_buffer.head = head;
-
-		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+		WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
 	}
 
 	return ret;
@@ -943,8 +770,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
 	u32 mask = (OA_BUFFER_SIZE - 1);
 	size_t start_offset = *offset;
-	unsigned long flags;
-	unsigned int aged_tail_idx;
 	u32 head, tail;
 	u32 taken;
 	int ret = 0;
@@ -952,41 +777,19 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	if (WARN_ON(!stream->enabled))
 		return -EIO;
 
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	head = dev_priv->perf.oa.oa_buffer.head;
-	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+	head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+	tail = gen7_oa_hw_tail_read(dev_priv);
+	rmb();
 
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/* An invalid tail pointer here means we're still waiting for the poll
-	 * hrtimer callback to give us a pointer
-	 */
-	if (tail == INVALID_TAIL_PTR)
-		return -EAGAIN;
-
-	/* NB: oa_buffer.head/tail include the gtt_offset which we don't want
+	/*
+	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
 	 * while indexing relative to oa_buf_base.
 	 */
 	head -= gtt_offset;
 	tail -= gtt_offset;
 
-	/* An out of bounds or misaligned head or tail pointer implies a driver
-	 * bug since we validate + align the tail pointers we read from the
-	 * hardware and we are in full control of the head pointer which should
-	 * only be incremented by multiples of the report size (notably also
-	 * all a power of two).
-	 */
-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
-		      tail > OA_BUFFER_SIZE || tail % report_size,
-		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
-		      head, tail))
-		return -EIO;
-
-
 	for (/* none */;
-	     (taken = OA_TAKEN(tail, head));
+	     (taken = OA_TAKEN(tail, head)) >= report_size;
 	     head = (head + report_size) & mask) {
 		u8 *report = oa_buf_base + head;
 		u32 *report32 = (void *)report;
@@ -1030,19 +833,15 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	}
 
 	if (start_offset != *offset) {
-		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
 		/* We removed the gtt_offset for the copy loop above, indexing
 		 * relative to oa_buf_base so put back here...
 		 */
 		head += gtt_offset;
 
 		I915_WRITE(GEN7_OASTATUS2,
-			   ((head & GEN7_OASTATUS2_HEAD_MASK) |
-			    GEN7_OASTATUS2_MEM_SELECT_GGTT));
-		dev_priv->perf.oa.oa_buffer.head = head;
-
-		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+			   (head & GEN7_OASTATUS2_HEAD_MASK) |
+			   GEN7_OASTATUS2_MEM_SELECT_GGTT);
+		WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
 	}
 
 	return ret;
@@ -1381,9 +1180,6 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 {
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	/* Pre-DevBDW: OABUFFER must be set with counters off,
 	 * before OASTATUS1, but after OASTATUS2
@@ -1396,12 +1192,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 
 	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
 
-	/* Mark that we need updated tail pointers to read from... */
-	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
 	/* On Haswell we have to track which OASTATUS1 flags we've
 	 * already seen since they can't be cleared while periodic
 	 * sampling is enabled.
@@ -1430,9 +1220,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 {
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	I915_WRITE(GEN8_OASTATUS, 0);
 	I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
@@ -1452,10 +1239,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
-	/* Mark that we need updated tail pointers to read from... */
-	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
 	/*
 	 * Reset state used to recognise context switches, affecting which
 	 * reports we will forward to userspace while filtering for a single
@@ -1463,8 +1246,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 	 */
 	dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
 
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
 	/*
 	 * NB: although the OA buffer will initially be allocated
 	 * zeroed via shmfs (and so this memset is redundant when
@@ -1865,8 +1646,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
 
 	I915_WRITE(GEN7_OACONTROL,
 		   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
-		   (period_exponent <<
-		    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+		   (period_exponent << GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
 		   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
 		   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
 		   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
@@ -1894,9 +1674,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
 	 * filtering and instead filter on the cpu based on the context-id
 	 * field of reports
 	 */
-	I915_WRITE(GEN8_OACONTROL, (report_format <<
-				    GEN8_OA_REPORT_FORMAT_SHIFT) |
-				   GEN8_OA_COUNTER_ENABLE);
+	I915_WRITE(GEN8_OACONTROL,
+		   (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
+		   GEN8_OA_COUNTER_ENABLE);
 }
 
 /**
@@ -1929,6 +1709,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream)
 				    GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
 				    50))
 		DRM_ERROR("wait for OA to be disabled timed out\n");
+	rmb();
 }
 
 static void gen8_oa_disable(struct i915_perf_stream *stream)
@@ -1940,6 +1721,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream)
 				    GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
 				    50))
 		DRM_ERROR("wait for OA to be disabled timed out\n");
+	rmb();
 }
 
 /**
@@ -3473,7 +3255,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 		INIT_LIST_HEAD(&dev_priv->perf.streams);
 		mutex_init(&dev_priv->perf.lock);
-		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
 		oa_sample_rate_hard_limit = 1000 *
 			(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
-- 
2.20.1



More information about the Intel-gfx-trybot mailing list