[Intel-gfx] [PATCH] RFC/RFT drm/i915/oa: Drop aging-tail

Chris Wilson chris at chris-wilson.co.uk
Tue Feb 19 10:28:49 UTC 2019


Switch to using coherent reads that are serialised with the register
read to avoid the memory latency problem in favour over an arbitrary
delay. The only zeroes seen during testing on HSW+ have been from
configuration changes that do not update (therefore were truly zero
entries and should be skipped).

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Cc: Matthew Auld <matthew.auld at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  59 ---
 drivers/gpu/drm/i915/i915_perf.c | 625 +++++--------------------------
 2 files changed, 87 insertions(+), 597 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d0489a1cd..773c205e1bbe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1881,12 +1881,6 @@ struct drm_i915_private {
 			wait_queue_head_t poll_wq;
 			bool pollin;
 
-			/**
-			 * For rate limiting any notifications of spurious
-			 * invalid OA reports
-			 */
-			struct ratelimit_state spurious_report_rs;
-
 			bool periodic;
 			int period_exponent;
 
@@ -1899,59 +1893,6 @@ struct drm_i915_private {
 				int format;
 				int format_size;
 
-				/**
-				 * Locks reads and writes to all head/tail state
-				 *
-				 * Consider: the head and tail pointer state
-				 * needs to be read consistently from a hrtimer
-				 * callback (atomic context) and read() fop
-				 * (user context) with tail pointer updates
-				 * happening in atomic context and head updates
-				 * in user context and the (unlikely)
-				 * possibility of read() errors needing to
-				 * reset all head/tail state.
-				 *
-				 * Note: Contention or performance aren't
-				 * currently a significant concern here
-				 * considering the relatively low frequency of
-				 * hrtimer callbacks (5ms period) and that
-				 * reads typically only happen in response to a
-				 * hrtimer event and likely complete before the
-				 * next callback.
-				 *
-				 * Note: This lock is not held *while* reading
-				 * and copying data to userspace so the value
-				 * of head observed in htrimer callbacks won't
-				 * represent any partial consumption of data.
-				 */
-				spinlock_t ptr_lock;
-
-				/**
-				 * One 'aging' tail pointer and one 'aged'
-				 * tail pointer ready to used for reading.
-				 *
-				 * Initial values of 0xffffffff are invalid
-				 * and imply that an update is required
-				 * (and should be ignored by an attempted
-				 * read)
-				 */
-				struct {
-					u32 offset;
-				} tails[2];
-
-				/**
-				 * Index for the aged tail ready to read()
-				 * data up to.
-				 */
-				unsigned int aged_tail_idx;
-
-				/**
-				 * A monotonic timestamp for when the current
-				 * aging tail pointer was read; used to
-				 * determine when it is old enough to trust.
-				 */
-				u64 aging_timestamp;
-
 				/**
 				 * Although we can always read back the head
 				 * pointer register, we prefer to avoid
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9ebf99f3d8d3..0569f0159d16 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -217,53 +217,8 @@
  * that the overflow cases are unlikely in normal operation.
  */
 #define OA_BUFFER_SIZE		SZ_16M
-
 #define OA_TAKEN(tail, head)	((tail - head) & (OA_BUFFER_SIZE - 1))
 
-/**
- * DOC: OA Tail Pointer Race
- *
- * There's a HW race condition between OA unit tail pointer register updates and
- * writes to memory whereby the tail pointer can sometimes get ahead of what's
- * been written out to the OA buffer so far (in terms of what's visible to the
- * CPU).
- *
- * Although this can be observed explicitly while copying reports to userspace
- * by checking for a zeroed report-id field in tail reports, we want to account
- * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
- * read() attempts.
- *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- *  1) An 'aging' tail with an associated timestamp that is tracked until we
- *     can trust the corresponding data is visible to the CPU; at which point
- *     it is considered 'aged'.
- *  2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
- *
- * Most of the implementation details for this workaround are in
- * oa_buffer_check_unlocked() and _append_oa_reports()
- *
- * Note for posterity: previously the driver used to define an effective tail
- * pointer that lagged the real pointer by a 'tail margin' measured in bytes
- * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
- * This was flawed considering that the OA unit may also automatically generate
- * non-periodic reports (such as on context switch) or the OA unit may be
- * enabled without any periodic sampling.
- */
-#define OA_TAIL_MARGIN_NSEC	100000ULL
-#define INVALID_TAIL_PTR	0xffffffff
-
 /* frequency for checking whether the OA unit has written new reports to the
  * circular OA buffer...
  */
@@ -413,199 +368,59 @@ static int get_oa_config(struct drm_i915_private *dev_priv,
 
 static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
 {
-	return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+	return I915_READ_FW(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
 }
 
 static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
 {
-	u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
-
-	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+	return I915_READ_FW(GEN7_OASTATUS1) & GEN7_OASTATUS1_TAIL_MASK;
 }
 
-/**
- * oa_buffer_check_unlocked - check for data and update tail ptr state
- * @dev_priv: i915 device instance
- *
- * This is either called via fops (for blocking reads in user ctx) or the poll
- * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
- * if there is data available for userspace to read.
- *
- * This function is central to providing a workaround for the OA unit tail
- * pointer having a race with respect to what data is visible to the CPU.
- * It is responsible for reading tail pointers from the hardware and giving
- * the pointers time to 'age' before they are made available for reading.
- * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
- *
- * Besides returning true when there is data available to read() this function
- * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
- * and .aged_tail_idx state used for reading.
- *
- * Note: It's safe to read OA config state here unlocked, assuming that this is
- * only called while the stream is enabled, while the global OA configuration
- * can't be modified.
- *
- * Returns: %true if the OA buffer contains data, else %false
- */
 static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 {
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	unsigned long flags;
-	unsigned int aged_idx;
-	u32 head, hw_tail, aged_tail, aging_tail;
-	u64 now;
-
-	/* We have to consider the (unlikely) possibility that read() errors
-	 * could result in an OA buffer reset which might reset the head,
-	 * tails[] and aged_tail state.
-	 */
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/* NB: The head we observe here might effectively be a little out of
-	 * date (between head and tails[aged_idx].offset if there is currently
-	 * a read() in progress.
-	 */
-	head = dev_priv->perf.oa.oa_buffer.head;
-
-	aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
-	aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
-
-	hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
-
-	/* The tail pointer increases in 64 byte increments,
-	 * not in report_size steps...
-	 */
-	hw_tail &= ~(report_size - 1);
-
-	now = ktime_get_mono_fast_ns();
-
-	/* Update the aged tail
-	 *
-	 * Flip the tail pointer available for read()s once the aging tail is
-	 * old enough to trust that the corresponding data will be visible to
-	 * the CPU...
-	 *
-	 * Do this before updating the aging pointer in case we may be able to
-	 * immediately start aging a new pointer too (if new data has become
-	 * available) without needing to wait for a later hrtimer callback.
-	 */
-	if (aging_tail != INVALID_TAIL_PTR &&
-	    ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
-	     OA_TAIL_MARGIN_NSEC)) {
-
-		aged_idx ^= 1;
-		dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
-
-		aged_tail = aging_tail;
-
-		/* Mark that we need a new pointer to start aging... */
-		dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
-		aging_tail = INVALID_TAIL_PTR;
-	}
-
-	/* Update the aging tail
-	 *
-	 * We throttle aging tail updates until we have a new tail that
-	 * represents >= one report more data than is already available for
-	 * reading. This ensures there will be enough data for a successful
-	 * read once this new pointer has aged and ensures we will give the new
-	 * pointer time to age.
-	 */
-	if (aging_tail == INVALID_TAIL_PTR &&
-	    (aged_tail == INVALID_TAIL_PTR ||
-	     OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
-		struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
-		u32 gtt_offset = i915_ggtt_offset(vma);
-
-		/* Be paranoid and do a bounds check on the pointer read back
-		 * from hardware, just in case some spurious hardware condition
-		 * could put the tail out of bounds...
-		 */
-		if (hw_tail >= gtt_offset &&
-		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
-			dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
-				aging_tail = hw_tail;
-			dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
-		} else {
-			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
-				  hw_tail);
-		}
-	}
+	u32 head, tail;
 
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+	head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+	tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
 
-	return aged_tail == INVALID_TAIL_PTR ?
-		false : OA_TAKEN(aged_tail, head) >= report_size;
+	return OA_TAKEN(tail, head) >= report_size;
 }
 
-/**
- * append_oa_status - Appends a status record to a userspace read() buffer.
- * @stream: An i915-perf stream opened for OA metrics
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @offset: (inout): the current position for writing into @buf
- * @type: The kind of status to report to userspace
- *
- * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`)
- * into the userspace read() buffer.
- *
- * The @buf @offset will only be updated on success.
- *
- * Returns: 0 on success, negative error code on failure.
- */
 static int append_oa_status(struct i915_perf_stream *stream,
 			    char __user *buf,
 			    size_t count,
 			    size_t *offset,
 			    enum drm_i915_perf_record_type type)
 {
-	struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
+	struct drm_i915_perf_record_header header = {
+		.type = type,
+		.size = sizeof(header),
+	};
 
-	if ((count - *offset) < header.size)
+	if (count - *offset < header.size)
 		return -ENOSPC;
 
 	if (copy_to_user(buf + *offset, &header, sizeof(header)))
 		return -EFAULT;
 
-	(*offset) += header.size;
-
+	*offset += header.size;
 	return 0;
 }
 
-/**
- * append_oa_sample - Copies single OA report into userspace read() buffer.
- * @stream: An i915-perf stream opened for OA metrics
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @offset: (inout): the current position for writing into @buf
- * @report: A single OA report to (optionally) include as part of the sample
- *
- * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*`
- * properties when opening a stream, tracked as `stream->sample_flags`. This
- * function copies the requested components of a single sample to the given
- * read() @buf.
- *
- * The @buf @offset will only be updated on success.
- *
- * Returns: 0 on success, negative error code on failure.
- */
 static int append_oa_sample(struct i915_perf_stream *stream,
 			    char __user *buf,
 			    size_t count,
 			    size_t *offset,
-			    const u8 *report)
+			    const void *report,
+			    int report_size)
 {
-	struct drm_i915_private *dev_priv = stream->dev_priv;
-	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	struct drm_i915_perf_record_header header;
-	u32 sample_flags = stream->sample_flags;
-
-	header.type = DRM_I915_PERF_RECORD_SAMPLE;
-	header.pad = 0;
-	header.size = stream->sample_size;
+	struct drm_i915_perf_record_header header = {
+		.type = DRM_I915_PERF_RECORD_SAMPLE,
+		.size = stream->sample_size,
+	};
 
-	if ((count - *offset) < header.size)
+	if (count - *offset < header.size)
 		return -ENOSPC;
 
 	buf += *offset;
@@ -613,113 +428,40 @@ static int append_oa_sample(struct i915_perf_stream *stream,
 		return -EFAULT;
 	buf += sizeof(header);
 
-	if (sample_flags & SAMPLE_OA_REPORT) {
-		if (copy_to_user(buf, report, report_size))
-			return -EFAULT;
-	}
-
-	(*offset) += header.size;
+	if (stream->sample_flags & SAMPLE_OA_REPORT &&
+	    copy_to_user(buf, report, report_size))
+		return -EFAULT;
 
+	*offset += header.size;
 	return 0;
 }
 
-/**
- * Copies all buffered OA reports into userspace read() buffer.
- * @stream: An i915-perf stream opened for OA metrics
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @offset: (inout): the current position for writing into @buf
- *
- * Notably any error condition resulting in a short read (-%ENOSPC or
- * -%EFAULT) will be returned even though one or more records may
- * have been successfully copied. In this case it's up to the caller
- * to decide if the error should be squashed before returning to
- * userspace.
- *
- * Note: reports are consumed from the head, and appended to the
- * tail, so the tail chases the head?... If you think that's mad
- * and back-to-front you're not alone, but this follows the
- * Gen PRM naming convention.
- *
- * Returns: 0 on success, negative error code on failure.
- */
 static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 				  char __user *buf,
 				  size_t count,
 				  size_t *offset)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
+	const void *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
-	u32 mask = (OA_BUFFER_SIZE - 1);
-	size_t start_offset = *offset;
-	unsigned long flags;
-	unsigned int aged_tail_idx;
+	u32 report[64] __aligned(16);
 	u32 head, tail;
-	u32 taken;
 	int ret = 0;
 
-	if (WARN_ON(!stream->enabled))
-		return -EIO;
-
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	head = dev_priv->perf.oa.oa_buffer.head;
-	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/*
-	 * An invalid tail pointer here means we're still waiting for the poll
-	 * hrtimer callback to give us a pointer
-	 */
-	if (tail == INVALID_TAIL_PTR)
-		return -EAGAIN;
-
-	/*
-	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
-	 * while indexing relative to oa_buf_base.
-	 */
-	head -= gtt_offset;
-	tail -= gtt_offset;
+	GEM_BUG_ON(report_size > sizeof(report));
 
-	/*
-	 * An out of bounds or misaligned head or tail pointer implies a driver
-	 * bug since we validate + align the tail pointers we read from the
-	 * hardware and we are in full control of the head pointer which should
-	 * only be incremented by multiples of the report size (notably also
-	 * all a power of two).
-	 */
-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
-		      tail > OA_BUFFER_SIZE || tail % report_size,
-		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
-		      head, tail))
+	if (WARN_ON(!stream->enabled))
 		return -EIO;
 
-
-	for (/* none */;
-	     (taken = OA_TAKEN(tail, head));
-	     head = (head + report_size) & mask) {
-		u8 *report = oa_buf_base + head;
-		u32 *report32 = (void *)report;
+	for (head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head) - gtt_offset,
+	     tail = gen8_oa_hw_tail_read(dev_priv) - gtt_offset;
+	     OA_TAKEN(tail, head) >= report_size;
+	     head = (head + report_size) & (OA_BUFFER_SIZE - 1)) {
 		u32 ctx_id;
 		u32 reason;
 
-		/*
-		 * All the report sizes factor neatly into the buffer
-		 * size so we never expect to see a report split
-		 * between the beginning and end of the buffer.
-		 *
-		 * Given the initial alignment check a misalignment
-		 * here would imply a driver bug that would result
-		 * in an overrun.
-		 */
-		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
-			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
-			break;
-		}
+		i915_memcpy_from_wc(report, oa_buf_base + head, report_size);
 
 		/*
 		 * The reason field includes flags identifying what
@@ -730,15 +472,12 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * check that the report isn't invalid before copying
 		 * it to userspace...
 		 */
-		reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
+		reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
 			  OAREPORT_REASON_MASK);
-		if (reason == 0) {
-			if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
-				DRM_NOTE("Skipping spurious, invalid OA report\n");
+		if (reason == 0)
 			continue;
-		}
 
-		ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
+		ctx_id = report[2] & dev_priv->perf.oa.specific_ctx_id_mask;
 
 		/*
 		 * Squash whatever is in the CTX_ID field if it's marked as
@@ -748,8 +487,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * Note: that we don't clear the valid_ctx_bit so userspace can
 		 * understand that the ID has been squashed by the kernel.
 		 */
-		if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
-			ctx_id = report32[2] = INVALID_CTX_ID;
+		if (!(report[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
+			ctx_id = report[2] = INVALID_CTX_ID;
 
 		/*
 		 * NB: For Gen 8 the OA unit no longer supports clock gating
@@ -793,42 +532,21 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 			 * leaking the IDs of other contexts.
 			 */
 			if (dev_priv->perf.oa.exclusive_stream->ctx &&
-			    dev_priv->perf.oa.specific_ctx_id != ctx_id) {
-				report32[2] = INVALID_CTX_ID;
-			}
+			    dev_priv->perf.oa.specific_ctx_id != ctx_id)
+				report[2] = INVALID_CTX_ID;
 
 			ret = append_oa_sample(stream, buf, count, offset,
-					       report);
+					       report, report_size);
 			if (ret)
 				break;
 
 			dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
 		}
-
-		/*
-		 * The above reason field sanity check is based on
-		 * the assumption that the OA buffer is initially
-		 * zeroed and we reset the field after copying so the
-		 * check is still meaningful once old reports start
-		 * being overwritten.
-		 */
-		report32[0] = 0;
 	}
 
-	if (start_offset != *offset) {
-		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-		/*
-		 * We removed the gtt_offset for the copy loop above, indexing
-		 * relative to oa_buf_base so put back here...
-		 */
-		head += gtt_offset;
-
-		I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
-		dev_priv->perf.oa.oa_buffer.head = head;
-
-		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-	}
+	head += gtt_offset;
+	I915_WRITE_FW(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
+	WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
 
 	return ret;
 }
@@ -865,7 +583,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
 	if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
 		return -EIO;
 
-	oastatus = I915_READ(GEN8_OASTATUS);
+	oastatus = I915_READ_FW(GEN8_OASTATUS);
 
 	/*
 	 * We treat OABUFFER_OVERFLOW as a significant error:
@@ -897,7 +615,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
 		 * Note: .oa_enable() is expected to re-init the oabuffer and
 		 * reset GEN8_OASTATUS for us
 		 */
-		oastatus = I915_READ(GEN8_OASTATUS);
+		oastatus = I915_READ_FW(GEN8_OASTATUS);
 	}
 
 	if (oastatus & GEN8_OASTATUS_REPORT_LOST) {
@@ -905,145 +623,58 @@ static int gen8_oa_read(struct i915_perf_stream *stream,
 				       DRM_I915_PERF_RECORD_OA_REPORT_LOST);
 		if (ret)
 			return ret;
-		I915_WRITE(GEN8_OASTATUS,
-			   oastatus & ~GEN8_OASTATUS_REPORT_LOST);
+		I915_WRITE_FW(GEN8_OASTATUS,
+			      oastatus & ~GEN8_OASTATUS_REPORT_LOST);
 	}
 
 	return gen8_append_oa_reports(stream, buf, count, offset);
 }
 
-/**
- * Copies all buffered OA reports into userspace read() buffer.
- * @stream: An i915-perf stream opened for OA metrics
- * @buf: destination buffer given by userspace
- * @count: the number of bytes userspace wants to read
- * @offset: (inout): the current position for writing into @buf
- *
- * Notably any error condition resulting in a short read (-%ENOSPC or
- * -%EFAULT) will be returned even though one or more records may
- * have been successfully copied. In this case it's up to the caller
- * to decide if the error should be squashed before returning to
- * userspace.
- *
- * Note: reports are consumed from the head, and appended to the
- * tail, so the tail chases the head?... If you think that's mad
- * and back-to-front you're not alone, but this follows the
- * Gen PRM naming convention.
- *
- * Returns: 0 on success, negative error code on failure.
- */
 static int gen7_append_oa_reports(struct i915_perf_stream *stream,
 				  char __user *buf,
 				  size_t count,
 				  size_t *offset)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
+	const void *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
 	int report_size = dev_priv->perf.oa.oa_buffer.format_size;
-	u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
-	u32 mask = (OA_BUFFER_SIZE - 1);
-	size_t start_offset = *offset;
-	unsigned long flags;
-	unsigned int aged_tail_idx;
+	u32 report[64] __aligned(16);
 	u32 head, tail;
-	u32 taken;
 	int ret = 0;
 
-	if (WARN_ON(!stream->enabled))
-		return -EIO;
-
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	head = dev_priv->perf.oa.oa_buffer.head;
-	aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
-	tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/* An invalid tail pointer here means we're still waiting for the poll
-	 * hrtimer callback to give us a pointer
-	 */
-	if (tail == INVALID_TAIL_PTR)
-		return -EAGAIN;
+	GEM_BUG_ON(report_size > sizeof(report));
 
-	/* NB: oa_buffer.head/tail include the gtt_offset which we don't want
-	 * while indexing relative to oa_buf_base.
-	 */
-	head -= gtt_offset;
-	tail -= gtt_offset;
-
-	/* An out of bounds or misaligned head or tail pointer implies a driver
-	 * bug since we validate + align the tail pointers we read from the
-	 * hardware and we are in full control of the head pointer which should
-	 * only be incremented by multiples of the report size (notably also
-	 * all a power of two).
-	 */
-	if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
-		      tail > OA_BUFFER_SIZE || tail % report_size,
-		      "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
-		      head, tail))
+	if (WARN_ON(!stream->enabled))
 		return -EIO;
 
+	for (head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head) - gtt_offset,
+	     tail = gen7_oa_hw_tail_read(dev_priv) - gtt_offset;
+	     OA_TAKEN(tail, head) >= report_size;
+	     head = (head + report_size) & (OA_BUFFER_SIZE - 1)) {
+		i915_memcpy_from_wc(report, oa_buf_base + head, report_size);
 
-	for (/* none */;
-	     (taken = OA_TAKEN(tail, head));
-	     head = (head + report_size) & mask) {
-		u8 *report = oa_buf_base + head;
-		u32 *report32 = (void *)report;
-
-		/* All the report sizes factor neatly into the buffer
-		 * size so we never expect to see a report split
-		 * between the beginning and end of the buffer.
-		 *
-		 * Given the initial alignment check a misalignment
-		 * here would imply a driver bug that would result
-		 * in an overrun.
-		 */
-		if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
-			DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
-			break;
-		}
-
-		/* The report-ID field for periodic samples includes
+		/*
+		 * The report-ID field for periodic samples includes
 		 * some undocumented flags related to what triggered
 		 * the report and is never expected to be zero so we
 		 * can check that the report isn't invalid before
 		 * copying it to userspace...
 		 */
-		if (report32[0] == 0) {
-			if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
-				DRM_NOTE("Skipping spurious, invalid OA report\n");
+		if (report[0] == 0)
 			continue;
-		}
 
-		ret = append_oa_sample(stream, buf, count, offset, report);
+		ret = append_oa_sample(stream, buf, count, offset,
+				       report, report_size);
 		if (ret)
 			break;
-
-		/* The above report-id field sanity check is based on
-		 * the assumption that the OA buffer is initially
-		 * zeroed and we reset the field after copying so the
-		 * check is still meaningful once old reports start
-		 * being overwritten.
-		 */
-		report32[0] = 0;
 	}
 
-	if (start_offset != *offset) {
-		spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-		/* We removed the gtt_offset for the copy loop above, indexing
-		 * relative to oa_buf_base so put back here...
-		 */
-		head += gtt_offset;
-
-		I915_WRITE(GEN7_OASTATUS2,
-			   ((head & GEN7_OASTATUS2_HEAD_MASK) |
-			    GEN7_OASTATUS2_MEM_SELECT_GGTT));
-		dev_priv->perf.oa.oa_buffer.head = head;
-
-		spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-	}
+	head += gtt_offset;
+	I915_WRITE_FW(GEN7_OASTATUS2,
+		      (head & GEN7_OASTATUS2_HEAD_MASK) |
+		      GEN7_OASTATUS2_MEM_SELECT_GGTT);
+	WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
 
 	return ret;
 }
@@ -1076,7 +707,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 	if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
 		return -EIO;
 
-	oastatus1 = I915_READ(GEN7_OASTATUS1);
+	oastatus1 = I915_READ_FW(GEN7_OASTATUS1);
 
 	/* XXX: On Haswell we don't have a safe way to clear oastatus1
 	 * bits while the OA unit is enabled (while the tail pointer
@@ -1117,7 +748,7 @@ static int gen7_oa_read(struct i915_perf_stream *stream,
 		dev_priv->perf.oa.ops.oa_disable(stream);
 		dev_priv->perf.oa.ops.oa_enable(stream);
 
-		oastatus1 = I915_READ(GEN7_OASTATUS1);
+		oastatus1 = I915_READ_FW(GEN7_OASTATUS1);
 	}
 
 	if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
@@ -1226,16 +857,6 @@ static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
 	return ce;
 }
 
-/**
- * oa_get_render_ctx_id - determine and hold ctx hw id
- * @stream: An i915-perf stream opened for OA metrics
- *
- * Determine the render context hw id, and ensure it remains fixed for the
- * lifetime of the stream. This ensures that we don't have to worry about
- * updating the context ID in OACONTROL on the fly.
- *
- * Returns: zero on success or a negative error code
- */
 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *i915 = stream->dev_priv;
@@ -1311,13 +932,6 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 	return 0;
 }
 
-/**
- * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold
- * @stream: An i915-perf stream opened for OA metrics
- *
- * In case anything needed doing to ensure the context HW ID would remain valid
- * for the lifetime of the stream, then that can be undone here.
- */
 static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
@@ -1339,12 +953,10 @@ free_oa_buffer(struct drm_i915_private *i915)
 {
 	mutex_lock(&i915->drm.struct_mutex);
 
-	i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma,
-				   I915_VMA_RELEASE_MAP);
+	i915_vma_unpin_iomap(i915->perf.oa.oa_buffer.vma);
+	i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, 0);
 
 	mutex_unlock(&i915->drm.struct_mutex);
-
-	i915->perf.oa.oa_buffer.vaddr = NULL;
 }
 
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
@@ -1371,19 +983,11 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 		oa_put_render_ctx_id(stream);
 
 	put_oa_config(dev_priv, stream->oa_config);
-
-	if (dev_priv->perf.oa.spurious_report_rs.missed) {
-		DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
-			 dev_priv->perf.oa.spurious_report_rs.missed);
-	}
 }
 
 static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 {
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	/* Pre-DevBDW: OABUFFER must be set with counters off,
 	 * before OASTATUS1, but after OASTATUS2
@@ -1396,31 +1000,12 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 
 	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
 
-	/* Mark that we need updated tail pointers to read from... */
-	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
 	/* On Haswell we have to track which OASTATUS1 flags we've
 	 * already seen since they can't be cleared while periodic
 	 * sampling is enabled.
 	 */
 	dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
 
-	/* NB: although the OA buffer will initially be allocated
-	 * zeroed via shmfs (and so this memset is redundant when
-	 * first allocating), we may re-init the OA buffer, either
-	 * when re-enabling a stream or in error/reset paths.
-	 *
-	 * The reason we clear the buffer for each re-init is for the
-	 * sanity check in gen7_append_oa_reports() that looks at the
-	 * report-id field to make sure it's non-zero which relies on
-	 * the assumption that new reports are being written to zeroed
-	 * memory...
-	 */
-	memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
 	/* Maybe make ->pollin per-stream state if we support multiple
 	 * concurrent streams in the future.
 	 */
@@ -1430,9 +1015,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
 static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 {
 	u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
 
 	I915_WRITE(GEN8_OASTATUS, 0);
 	I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
@@ -1452,10 +1034,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 		   OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
 	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
 
-	/* Mark that we need updated tail pointers to read from... */
-	dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
-	dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
 	/*
 	 * Reset state used to recognise context switches, affecting which
 	 * reports we will forward to userspace while filtering for a single
@@ -1463,22 +1041,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
 	 */
 	dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
 
-	spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
-	/*
-	 * NB: although the OA buffer will initially be allocated
-	 * zeroed via shmfs (and so this memset is redundant when
-	 * first allocating), we may re-init the OA buffer, either
-	 * when re-enabling a stream or in error/reset paths.
-	 *
-	 * The reason we clear the buffer for each re-init is for the
-	 * sanity check in gen8_append_oa_reports() that looks at the
-	 * reason field to make sure it's non-zero which relies on
-	 * the assumption that new reports are being written to zeroed
-	 * memory...
-	 */
-	memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
 	/*
 	 * Maybe make ->pollin per-stream state if we support multiple
 	 * concurrent streams in the future.
@@ -1513,16 +1075,19 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_unref;
 
+	ret = i915_gem_object_set_to_gtt_domain(bo, true);
+	if (ret)
+		goto err_unref;
+
 	/* PreHSW required 512K alignment, HSW requires 16M */
-	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
+	vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, PIN_MAPPABLE);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto err_unref;
 	}
 	dev_priv->perf.oa.oa_buffer.vma = vma;
 
-	dev_priv->perf.oa.oa_buffer.vaddr =
-		i915_gem_object_pin_map(bo, I915_MAP_WB);
+	dev_priv->perf.oa.oa_buffer.vaddr = i915_vma_pin_iomap(vma);
 	if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
 		ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
 		goto err_unpin;
@@ -1540,7 +1105,6 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
 err_unref:
 	i915_gem_object_put(bo);
 
-	dev_priv->perf.oa.oa_buffer.vaddr = NULL;
 	dev_priv->perf.oa.oa_buffer.vma = NULL;
 
 unlock:
@@ -1554,11 +1118,15 @@ static void config_oa_regs(struct drm_i915_private *dev_priv,
 {
 	u32 i;
 
+	if (!n_regs)
+		return;
+
 	for (i = 0; i < n_regs; i++) {
 		const struct i915_oa_reg *reg = regs + i;
 
 		I915_WRITE(reg->addr, reg->value);
 	}
+	POSTING_READ(regs[n_regs - 1].addr);
 }
 
 static int hsw_enable_metric_set(struct i915_perf_stream *stream)
@@ -1865,8 +1433,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
 
 	I915_WRITE(GEN7_OACONTROL,
 		   (ctx_id & GEN7_OACONTROL_CTX_MASK) |
-		   (period_exponent <<
-		    GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+		   (period_exponent << GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
 		   (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
 		   (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
 		   (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
@@ -1894,9 +1461,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
 	 * filtering and instead filter on the cpu based on the context-id
 	 * field of reports
 	 */
-	I915_WRITE(GEN8_OACONTROL, (report_format <<
-				    GEN8_OA_REPORT_FORMAT_SHIFT) |
-				   GEN8_OA_COUNTER_ENABLE);
+	I915_WRITE(GEN8_OACONTROL,
+		   (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
+		   GEN8_OA_COUNTER_ENABLE);
 }
 
 /**
@@ -2028,26 +1595,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
-	/* We set up some ratelimit state to potentially throttle any _NOTES
-	 * about spurious, invalid OA reports which we don't forward to
-	 * userspace.
-	 *
-	 * The initialization is associated with opening the stream (not driver
-	 * init) considering we print a _NOTE about any throttling when closing
-	 * the stream instead of waiting until driver _fini which no one would
-	 * ever see.
-	 *
-	 * Using the same limiting factors as printk_ratelimit()
-	 */
-	ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
-			     5 * HZ, 10);
-	/* Since we use a DRM_NOTE for spurious reports it would be
-	 * inconsistent to let __ratelimit() automatically print a warning for
-	 * throttling.
-	 */
-	ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
-			    RATELIMIT_MSG_ON_RELEASE);
-
 	stream->sample_size = sizeof(struct drm_i915_perf_record_header);
 
 	format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -3391,6 +2938,9 @@ static struct ctl_table dev_root[] = {
  */
 void i915_perf_init(struct drm_i915_private *dev_priv)
 {
+	if (!i915_has_memcpy_from_wc())
+		return;
+
 	if (IS_HASWELL(dev_priv)) {
 		dev_priv->perf.oa.ops.is_valid_b_counter_reg =
 			gen7_is_valid_b_counter_addr;
@@ -3473,7 +3023,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 
 		INIT_LIST_HEAD(&dev_priv->perf.streams);
 		mutex_init(&dev_priv->perf.lock);
-		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
 		oa_sample_rate_hard_limit = 1000 *
 			(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
-- 
2.20.1



More information about the Intel-gfx mailing list