[PATCH] drm/i915/oa: rmb ftw
Chris Wilson
chris at chris-wilson.co.uk
Mon Feb 18 22:44:59 UTC 2019
---
drivers/gpu/drm/i915/i915_drv.h | 53 ------
drivers/gpu/drm/i915/i915_perf.c | 269 +++----------------------------
2 files changed, 25 insertions(+), 297 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d0489a1cd..7415e64e1fd3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1899,59 +1899,6 @@ struct drm_i915_private {
int format;
int format_size;
- /**
- * Locks reads and writes to all head/tail state
- *
- * Consider: the head and tail pointer state
- * needs to be read consistently from a hrtimer
- * callback (atomic context) and read() fop
- * (user context) with tail pointer updates
- * happening in atomic context and head updates
- * in user context and the (unlikely)
- * possibility of read() errors needing to
- * reset all head/tail state.
- *
- * Note: Contention or performance aren't
- * currently a significant concern here
- * considering the relatively low frequency of
- * hrtimer callbacks (5ms period) and that
- * reads typically only happen in response to a
- * hrtimer event and likely complete before the
- * next callback.
- *
- * Note: This lock is not held *while* reading
- * and copying data to userspace so the value
- * of head observed in htrimer callbacks won't
- * represent any partial consumption of data.
- */
- spinlock_t ptr_lock;
-
- /**
- * One 'aging' tail pointer and one 'aged'
- * tail pointer ready to used for reading.
- *
- * Initial values of 0xffffffff are invalid
- * and imply that an update is required
- * (and should be ignored by an attempted
- * read)
- */
- struct {
- u32 offset;
- } tails[2];
-
- /**
- * Index for the aged tail ready to read()
- * data up to.
- */
- unsigned int aged_tail_idx;
-
- /**
- * A monotonic timestamp for when the current
- * aging tail pointer was read; used to
- * determine when it is old enough to trust.
- */
- u64 aging_timestamp;
-
/**
* Although we can always read back the head
* pointer register, we prefer to avoid
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9ebf99f3d8d3..66ba61cbc747 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -217,53 +217,8 @@
* that the overflow cases are unlikely in normal operation.
*/
#define OA_BUFFER_SIZE SZ_16M
-
#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
-/**
- * DOC: OA Tail Pointer Race
- *
- * There's a HW race condition between OA unit tail pointer register updates and
- * writes to memory whereby the tail pointer can sometimes get ahead of what's
- * been written out to the OA buffer so far (in terms of what's visible to the
- * CPU).
- *
- * Although this can be observed explicitly while copying reports to userspace
- * by checking for a zeroed report-id field in tail reports, we want to account
- * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
- * read() attempts.
- *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- * 1) An 'aging' tail with an associated timestamp that is tracked until we
- * can trust the corresponding data is visible to the CPU; at which point
- * it is considered 'aged'.
- * 2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
- *
- * Most of the implementation details for this workaround are in
- * oa_buffer_check_unlocked() and _append_oa_reports()
- *
- * Note for posterity: previously the driver used to define an effective tail
- * pointer that lagged the real pointer by a 'tail margin' measured in bytes
- * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
- * This was flawed considering that the OA unit may also automatically generate
- * non-periodic reports (such as on context switch) or the OA unit may be
- * enabled without any periodic sampling.
- */
-#define OA_TAIL_MARGIN_NSEC 100000ULL
-#define INVALID_TAIL_PTR 0xffffffff
-
/* frequency for checking whether the OA unit has written new reports to the
* circular OA buffer...
*/
@@ -418,9 +373,7 @@ static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
{
- u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
-
- return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+ return I915_READ(GEN7_OASTATUS1) & GEN7_OASTATUS1_TAIL_MASK;
}
/**
@@ -431,112 +384,17 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
* check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
* if there is data available for userspace to read.
*
- * This function is central to providing a workaround for the OA unit tail
- * pointer having a race with respect to what data is visible to the CPU.
- * It is responsible for reading tail pointers from the hardware and giving
- * the pointers time to 'age' before they are made available for reading.
- * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
- *
- * Besides returning true when there is data available to read() this function
- * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
- * and .aged_tail_idx state used for reading.
- *
- * Note: It's safe to read OA config state here unlocked, assuming that this is
- * only called while the stream is enabled, while the global OA configuration
- * can't be modified.
- *
* Returns: %true if the OA buffer contains data, else %false
*/
static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- unsigned long flags;
- unsigned int aged_idx;
- u32 head, hw_tail, aged_tail, aging_tail;
- u64 now;
-
- /* We have to consider the (unlikely) possibility that read() errors
- * could result in an OA buffer reset which might reset the head,
- * tails[] and aged_tail state.
- */
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /* NB: The head we observe here might effectively be a little out of
- * date (between head and tails[aged_idx].offset if there is currently
- * a read() in progress.
- */
- head = dev_priv->perf.oa.oa_buffer.head;
-
- aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
- aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
-
- hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
-
- /* The tail pointer increases in 64 byte increments,
- * not in report_size steps...
- */
- hw_tail &= ~(report_size - 1);
-
- now = ktime_get_mono_fast_ns();
-
- /* Update the aged tail
- *
- * Flip the tail pointer available for read()s once the aging tail is
- * old enough to trust that the corresponding data will be visible to
- * the CPU...
- *
- * Do this before updating the aging pointer in case we may be able to
- * immediately start aging a new pointer too (if new data has become
- * available) without needing to wait for a later hrtimer callback.
- */
- if (aging_tail != INVALID_TAIL_PTR &&
- ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
- OA_TAIL_MARGIN_NSEC)) {
-
- aged_idx ^= 1;
- dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
-
- aged_tail = aging_tail;
-
- /* Mark that we need a new pointer to start aging... */
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
- aging_tail = INVALID_TAIL_PTR;
- }
-
- /* Update the aging tail
- *
- * We throttle aging tail updates until we have a new tail that
- * represents >= one report more data than is already available for
- * reading. This ensures there will be enough data for a successful
- * read once this new pointer has aged and ensures we will give the new
- * pointer time to age.
- */
- if (aging_tail == INVALID_TAIL_PTR &&
- (aged_tail == INVALID_TAIL_PTR ||
- OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
- struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
- u32 gtt_offset = i915_ggtt_offset(vma);
-
- /* Be paranoid and do a bounds check on the pointer read back
- * from hardware, just in case some spurious hardware condition
- * could put the tail out of bounds...
- */
- if (hw_tail >= gtt_offset &&
- hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
- aging_tail = hw_tail;
- dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
- } else {
- DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
- hw_tail);
- }
- }
+ u32 head, tail;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+ tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
- return aged_tail == INVALID_TAIL_PTR ?
- false : OA_TAKEN(aged_tail, head) >= report_size;
+ return OA_TAKEN(tail, head) >= report_size;
}
/**
@@ -654,8 +512,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
- unsigned long flags;
- unsigned int aged_tail_idx;
u32 head, tail;
u32 taken;
int ret = 0;
@@ -663,20 +519,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /*
- * An invalid tail pointer here means we're still waiting for the poll
- * hrtimer callback to give us a pointer
- */
- if (tail == INVALID_TAIL_PTR)
- return -EAGAIN;
+ head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+ tail = gen8_oa_hw_tail_read(dev_priv);
+ rmb();
/*
* NB: oa_buffer.head/tail include the gtt_offset which we don't want
@@ -685,22 +530,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
head -= gtt_offset;
tail -= gtt_offset;
- /*
- * An out of bounds or misaligned head or tail pointer implies a driver
- * bug since we validate + align the tail pointers we read from the
- * hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
- */
- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
- "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
- head, tail))
- return -EIO;
-
-
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ (taken = OA_TAKEN(tail, head)) >= report_size;
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -816,8 +647,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/*
* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
@@ -825,9 +654,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
head += gtt_offset;
I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
- dev_priv->perf.oa.oa_buffer.head = head;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
}
return ret;
@@ -943,8 +770,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
- unsigned long flags;
- unsigned int aged_tail_idx;
u32 head, tail;
u32 taken;
int ret = 0;
@@ -952,41 +777,19 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
+ head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+ tail = gen7_oa_hw_tail_read(dev_priv);
+ rmb();
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /* An invalid tail pointer here means we're still waiting for the poll
- * hrtimer callback to give us a pointer
- */
- if (tail == INVALID_TAIL_PTR)
- return -EAGAIN;
-
- /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
+ /*
+ * NB: oa_buffer.head/tail include the gtt_offset which we don't want
* while indexing relative to oa_buf_base.
*/
head -= gtt_offset;
tail -= gtt_offset;
- /* An out of bounds or misaligned head or tail pointer implies a driver
- * bug since we validate + align the tail pointers we read from the
- * hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
- */
- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
- "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
- head, tail))
- return -EIO;
-
-
for (/* none */;
- (taken = OA_TAKEN(tail, head));
+ (taken = OA_TAKEN(tail, head)) >= report_size;
head = (head + report_size) & mask) {
u8 *report = oa_buf_base + head;
u32 *report32 = (void *)report;
@@ -1030,19 +833,15 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
*/
head += gtt_offset;
I915_WRITE(GEN7_OASTATUS2,
- ((head & GEN7_OASTATUS2_HEAD_MASK) |
- GEN7_OASTATUS2_MEM_SELECT_GGTT));
- dev_priv->perf.oa.oa_buffer.head = head;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ (head & GEN7_OASTATUS2_HEAD_MASK) |
+ GEN7_OASTATUS2_MEM_SELECT_GGTT);
+ WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
}
return ret;
@@ -1381,9 +1180,6 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
{
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- unsigned long flags;
-
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
/* Pre-DevBDW: OABUFFER must be set with counters off,
* before OASTATUS1, but after OASTATUS2
@@ -1396,12 +1192,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
- /* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/* On Haswell we have to track which OASTATUS1 flags we've
* already seen since they can't be cleared while periodic
* sampling is enabled.
@@ -1430,9 +1220,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
{
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- unsigned long flags;
-
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
I915_WRITE(GEN8_OASTATUS, 0);
I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
@@ -1452,10 +1239,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
- /* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
/*
* Reset state used to recognise context switches, affecting which
* reports we will forward to userspace while filtering for a single
@@ -1463,8 +1246,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
*/
dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/*
* NB: although the OA buffer will initially be allocated
* zeroed via shmfs (and so this memset is redundant when
@@ -1865,8 +1646,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
I915_WRITE(GEN7_OACONTROL,
(ctx_id & GEN7_OACONTROL_CTX_MASK) |
- (period_exponent <<
- GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+ (period_exponent << GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
(periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
(report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
(ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
@@ -1894,9 +1674,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
* filtering and instead filter on the cpu based on the context-id
* field of reports
*/
- I915_WRITE(GEN8_OACONTROL, (report_format <<
- GEN8_OA_REPORT_FORMAT_SHIFT) |
- GEN8_OA_COUNTER_ENABLE);
+ I915_WRITE(GEN8_OACONTROL,
+ (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
+ GEN8_OA_COUNTER_ENABLE);
}
/**
@@ -1929,6 +1709,7 @@ static void gen7_oa_disable(struct i915_perf_stream *stream)
GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0,
50))
DRM_ERROR("wait for OA to be disabled timed out\n");
+ rmb();
}
static void gen8_oa_disable(struct i915_perf_stream *stream)
@@ -1940,6 +1721,7 @@ static void gen8_oa_disable(struct i915_perf_stream *stream)
GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0,
50))
DRM_ERROR("wait for OA to be disabled timed out\n");
+ rmb();
}
/**
@@ -3473,7 +3255,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
INIT_LIST_HEAD(&dev_priv->perf.streams);
mutex_init(&dev_priv->perf.lock);
- spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
oa_sample_rate_hard_limit = 1000 *
(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
--
2.20.1
More information about the Intel-gfx-trybot
mailing list