[Intel-gfx] [RFC 1/4] drm/i915/perf: rework aging tail workaround

Lionel Landwerlin lionel.g.landwerlin at intel.com
Wed Dec 19 14:37:44 UTC 2018


We're about to introduce an options to open the perf stream, giving
the user ability to configure how often it wants the kernel to pull
the OA registers for available data.

Right now the workaround against the OA tail pointer race condition
requires at least twice the internal kernel pulling timer to make any
data available.

This changes introduce checks on the OA data written into the circular
buffer to make as much data as possible available on the first
iteration of the pulling timer.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  32 +++---
 drivers/gpu/drm/i915/i915_perf.c | 162 +++++++++++++++----------------
 2 files changed, 95 insertions(+), 99 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0689e67c966e..09c4b5de141b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1845,6 +1845,12 @@ struct drm_i915_private {
 			 */
 			struct ratelimit_state spurious_report_rs;
 
+			/**
+			 * For rate limiting any notifications of tail pointer
+			 * race.
+			 */
+			struct ratelimit_state tail_pointer_race;
+
 			bool periodic;
 			int period_exponent;
 
@@ -1885,23 +1891,11 @@ struct drm_i915_private {
 				spinlock_t ptr_lock;
 
 				/**
-				 * One 'aging' tail pointer and one 'aged'
-				 * tail pointer ready to used for reading.
-				 *
-				 * Initial values of 0xffffffff are invalid
-				 * and imply that an update is required
-				 * (and should be ignored by an attempted
-				 * read)
-				 */
-				struct {
-					u32 offset;
-				} tails[2];
-
-				/**
-				 * Index for the aged tail ready to read()
-				 * data up to.
+				 * The last HW tail reported by HW. The data
+				 * might not have made it to memory yet
+				 * though.
 				 */
-				unsigned int aged_tail_idx;
+				u32 aging_tail;
 
 				/**
 				 * A monotonic timestamp for when the current
@@ -1920,6 +1914,12 @@ struct drm_i915_private {
 				 * data to userspace.
 				 */
 				u32 head;
+
+				/**
+				 * The last tail verified tail that can be
+				 * read by userspace.
+				 */
+				u32 tail;
 			} oa_buffer;
 
 			u32 gen7_latched_oastatus1;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 4529edfdcfc8..d54142f1cff4 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -451,8 +451,7 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 {
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
 	unsigned long flags;
-	unsigned int aged_idx;
-	u32 head, hw_tail, aged_tail, aging_tail;
+	u32 hw_tail;
 	u64 now;
 
 	/* We have to consider the (unlikely) possibility that read() errors
@@ -461,16 +460,6 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 	 */
 	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
-	/* NB: The head we observe here might effectively be a little out of
-	 * date (between head and tails[aged_idx].offset if there is currently
-	 * a read() in progress.
-	 */
-	head = dev_priv->perf.oa.oa_buffer.head;
-
-	aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
-	aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
-
 	hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
 
 	/* The tail pointer increases in 64 byte increments,
@@ -480,63 +469,76 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 
 	now = ktime_get_mono_fast_ns();
 
-	/* Update the aged tail
-	 *
-	 * Flip the tail pointer available for read()s once the aging tail is
-	 * old enough to trust that the corresponding data will be visible to
-	 * the CPU...
-	 *
-	 * Do this before updating the aging pointer in case we may be able to
-	 * immediately start aging a new pointer too (if new data has become
-	 * available) without needing to wait for a later hrtimer callback.
-	 */
-	if (aging_tail != INVALID_TAIL_PTR &&
-	    ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
-	     OA_TAIL_MARGIN_NSEC)) {
-
-		aged_idx ^= 1;
-		dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
+	if (hw_tail == dev_priv->perf.oa.oa_buffer.aging_tail) {
+		/* If the HW tail hasn't move since the last check and the HW
+		 * tail has been aging for long enough, declare it the new
+		 * tail.
+		 */
+		if ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
+		    OA_TAIL_MARGIN_NSEC) {
+			dev_priv->perf.oa.oa_buffer.tail =
+				dev_priv->perf.oa.oa_buffer.aging_tail;
+		}
+	} else {
+		u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
+		u32 head, tail, landed_report_heads;
 
-		aged_tail = aging_tail;
+		/* NB: The head we observe here might effectively be a little out of
+		 * date (between head and tails[aged_idx].offset if there is currently
+		 * a read() in progress.
+		 */
+		head = dev_priv->perf.oa.oa_buffer.head - gtt_offset;
 
-		/* Mark that we need a new pointer to start aging... */
-		dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
-		aging_tail = INVALID_TAIL_PTR;
-	}
+		hw_tail -= gtt_offset;
+		tail = hw_tail;
 
-	/* Update the aging tail
-	 *
-	 * We throttle aging tail updates until we have a new tail that
-	 * represents >= one report more data than is already available for
-	 * reading. This ensures there will be enough data for a successful
-	 * read once this new pointer has aged and ensures we will give the new
-	 * pointer time to age.
-	 */
-	if (aging_tail == INVALID_TAIL_PTR &&
-	    (aged_tail == INVALID_TAIL_PTR ||
-	     OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
-		struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
-		u32 gtt_offset = i915_ggtt_offset(vma);
-
-		/* Be paranoid and do a bounds check on the pointer read back
-		 * from hardware, just in case some spurious hardware condition
-		 * could put the tail out of bounds...
+		/* Walk the stream backward until we find at least 2 reports
+		 * with dword 0 & 1 not at 0. Since the circular buffer
+		 * pointers progress by increments of 64 bytes and that
+		 * reports can be up to 256 bytes long, we can't tell whether
+		 * a report has fully landed in memory before the first 2
+		 * dwords of the following report have effectively landed.
+		 *
+		 * This is assuming that chunks of memory the OA unit writes
+		 * to land in the order they were written to.
+		 * If not : (╯°□°)╯︵ ┻━┻
 		 */
-		if (hw_tail >= gtt_offset &&
-		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
-			dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
-				aging_tail = hw_tail;
-			dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
-		} else {
-			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
-				  hw_tail);
+		landed_report_heads = 0;
+		while (OA_TAKEN(tail, head) >= report_size) {
+			u32 previous_tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
+			u8 *report = dev_priv->perf.oa.oa_buffer.vaddr + previous_tail;
+			u32 *report32 = (void *) report;
+
+			/* Head of the report indicated by the HW tail register has
+			 * indeed landed into memory.
+			 */
+			if (report32[0] != 0 || report[1] != 0) {
+				landed_report_heads++;
+
+				if (landed_report_heads >= 2)
+					break;
+			}
+
+			tail = previous_tail;
 		}
+
+		if (abs(tail - hw_tail) >= (2 * report_size)) {
+			if (__ratelimit(&dev_priv->perf.oa.tail_pointer_race)) {
+				DRM_NOTE("unlanded report(s) head=0x%x "
+					 "tail=0x%x hw_tail=0x%x\n",
+					 head, tail, hw_tail);
+			}
+		}
+
+		dev_priv->perf.oa.oa_buffer.tail = gtt_offset + tail;
+		dev_priv->perf.oa.oa_buffer.aging_tail = gtt_offset + hw_tail;
+		dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
 	}
 
 	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
-	return aged_tail == INVALID_TAIL_PTR ?
-		false : OA_TAKEN(aged_tail, head) >= report_size;
+	return OA_TAKEN(dev_priv->perf.oa.oa_buffer.tail,
+			dev_priv->perf.oa.oa_buffer.head) >= report_size;
 }
 
 /**
@@ -655,7 +657,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	u32 mask = (OA_BUFFER_SIZE - 1);
 	size_t start_offset = *offset;
 	unsigned long flags;
-	unsigned int aged_tail_idx;
 	u32 head, tail;
 	u32 taken;
 	int ret = 0;
@@ -666,8 +667,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	head = dev_priv->perf.oa.oa_buffer.head;
-	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+	tail = dev_priv->perf.oa.oa_buffer.tail;
 
 	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
@@ -698,7 +698,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		      head, tail))
 		return -EIO;
 
-
 	for (/* none */;
 	     (taken = OA_TAKEN(tail, head));
 	     head = (head + report_size) & mask) {
@@ -806,13 +805,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		}
 
 		/*
-		 * The above reason field sanity check is based on
-		 * the assumption that the OA buffer is initially
-		 * zeroed and we reset the field after copying so the
-		 * check is still meaningful once old reports start
-		 * being overwritten.
+		 * Clear out the first 2 dword as a mean to detect unlanded
+		 * reports.
 		 */
-		report32[0] = 0;
+		report32[0] = report32[1] = 0;
 	}
 
 	if (start_offset != *offset) {
@@ -944,7 +940,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	u32 mask = (OA_BUFFER_SIZE - 1);
 	size_t start_offset = *offset;
 	unsigned long flags;
-	unsigned int aged_tail_idx;
 	u32 head, tail;
 	u32 taken;
 	int ret = 0;
@@ -955,8 +950,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	head = dev_priv->perf.oa.oa_buffer.head;
-	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+	tail = dev_priv->perf.oa.oa_buffer.tail;
 
 	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
@@ -1020,13 +1014,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 		if (ret)
 			break;
 
-		/* The above report-id field sanity check is based on
-		 * the assumption that the OA buffer is initially
-		 * zeroed and we reset the field after copying so the
-		 * check is still meaningful once old reports start
-		 * being overwritten.
+		/* Clear out the first 2 dwords as a mean to detect unlanded
+		 * reports.
 		 */
-		report32[0] = 0;
+		report32[0] = report32[1] = 0;
 	}
 
 	if (start_offset != *offset) {
@@ -1397,8 +1388,8 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
 
 	/* Mark that we need updated tail pointers to read from... */
-	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+	dev_priv->perf.oa.oa_buffer.aging_tail =
+		dev_priv->perf.oa.oa_buffer.tail = gtt_offset;
 
 	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
@@ -1453,8 +1444,8 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
 	/* Mark that we need updated tail pointers to read from... */
-	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
+	dev_priv->perf.oa.oa_buffer.aging_tail =
+		dev_priv->perf.oa.oa_buffer.tail = gtt_offset;
 
 	/*
 	 * Reset state used to recognise context switches, affecting which
@@ -2043,6 +2034,11 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 	ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
 			    RATELIMIT_MSG_ON_RELEASE);
 
+	ratelimit_state_init(&dev_priv->perf.oa.tail_pointer_race,
+			     5 * HZ, 10);
+	ratelimit_set_flags(&dev_priv->perf.oa.tail_pointer_race,
+			    RATELIMIT_MSG_ON_RELEASE);
+
 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
 	format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
-- 
2.20.1



More information about the Intel-gfx mailing list