[PATCH] drm/i915/oa: rmb ftw
Chris Wilson
chris at chris-wilson.co.uk
Mon Feb 18 21:03:43 UTC 2019
---
drivers/gpu/drm/i915/i915_drv.h | 53 ---------
drivers/gpu/drm/i915/i915_perf.c | 183 +++----------------------------
2 files changed, 14 insertions(+), 222 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d0489a1cd..7415e64e1fd3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1899,59 +1899,6 @@ struct drm_i915_private {
int format;
int format_size;
- /**
- * Locks reads and writes to all head/tail state
- *
- * Consider: the head and tail pointer state
- * needs to be read consistently from a hrtimer
- * callback (atomic context) and read() fop
- * (user context) with tail pointer updates
- * happening in atomic context and head updates
- * in user context and the (unlikely)
- * possibility of read() errors needing to
- * reset all head/tail state.
- *
- * Note: Contention or performance aren't
- * currently a significant concern here
- * considering the relatively low frequency of
- * hrtimer callbacks (5ms period) and that
- * reads typically only happen in response to a
- * hrtimer event and likely complete before the
- * next callback.
- *
- * Note: This lock is not held *while* reading
- * and copying data to userspace so the value
- * of head observed in htrimer callbacks won't
- * represent any partial consumption of data.
- */
- spinlock_t ptr_lock;
-
- /**
- * One 'aging' tail pointer and one 'aged'
- * tail pointer ready to used for reading.
- *
- * Initial values of 0xffffffff are invalid
- * and imply that an update is required
- * (and should be ignored by an attempted
- * read)
- */
- struct {
- u32 offset;
- } tails[2];
-
- /**
- * Index for the aged tail ready to read()
- * data up to.
- */
- unsigned int aged_tail_idx;
-
- /**
- * A monotonic timestamp for when the current
- * aging tail pointer was read; used to
- * determine when it is old enough to trust.
- */
- u64 aging_timestamp;
-
/**
* Although we can always read back the head
* pointer register, we prefer to avoid
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9ebf99f3d8d3..e6e3c56dab4e 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -450,93 +450,16 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- unsigned long flags;
- unsigned int aged_idx;
- u32 head, hw_tail, aged_tail, aging_tail;
- u64 now;
-
- /* We have to consider the (unlikely) possibility that read() errors
- * could result in an OA buffer reset which might reset the head,
- * tails[] and aged_tail state.
- */
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ u32 head, tail;
/* NB: The head we observe here might effectively be a little out of
* date (between head and tails[aged_idx].offset if there is currently
* a read() in progress.
*/
- head = dev_priv->perf.oa.oa_buffer.head;
-
- aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
- aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
-
- hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
-
- /* The tail pointer increases in 64 byte increments,
- * not in report_size steps...
- */
- hw_tail &= ~(report_size - 1);
-
- now = ktime_get_mono_fast_ns();
-
- /* Update the aged tail
- *
- * Flip the tail pointer available for read()s once the aging tail is
- * old enough to trust that the corresponding data will be visible to
- * the CPU...
- *
- * Do this before updating the aging pointer in case we may be able to
- * immediately start aging a new pointer too (if new data has become
- * available) without needing to wait for a later hrtimer callback.
- */
- if (aging_tail != INVALID_TAIL_PTR &&
- ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
- OA_TAIL_MARGIN_NSEC)) {
-
- aged_idx ^= 1;
- dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
-
- aged_tail = aging_tail;
+ head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+ tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
- /* Mark that we need a new pointer to start aging... */
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
- aging_tail = INVALID_TAIL_PTR;
- }
-
- /* Update the aging tail
- *
- * We throttle aging tail updates until we have a new tail that
- * represents >= one report more data than is already available for
- * reading. This ensures there will be enough data for a successful
- * read once this new pointer has aged and ensures we will give the new
- * pointer time to age.
- */
- if (aging_tail == INVALID_TAIL_PTR &&
- (aged_tail == INVALID_TAIL_PTR ||
- OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
- struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
- u32 gtt_offset = i915_ggtt_offset(vma);
-
- /* Be paranoid and do a bounds check on the pointer read back
- * from hardware, just in case some spurious hardware condition
- * could put the tail out of bounds...
- */
- if (hw_tail >= gtt_offset &&
- hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
- aging_tail = hw_tail;
- dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
- } else {
- DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
- hw_tail);
- }
- }
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- return aged_tail == INVALID_TAIL_PTR ?
- false : OA_TAKEN(aged_tail, head) >= report_size;
+ return OA_TAKEN(tail, head) >= report_size;
}
/**
@@ -654,8 +577,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
- unsigned long flags;
- unsigned int aged_tail_idx;
u32 head, tail;
u32 taken;
int ret = 0;
@@ -663,20 +584,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /*
- * An invalid tail pointer here means we're still waiting for the poll
- * hrtimer callback to give us a pointer
- */
- if (tail == INVALID_TAIL_PTR)
- return -EAGAIN;
+ tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
+ rmb();
/*
* NB: oa_buffer.head/tail include the gtt_offset which we don't want
@@ -685,20 +595,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
head -= gtt_offset;
tail -= gtt_offset;
- /*
- * An out of bounds or misaligned head or tail pointer implies a driver
- * bug since we validate + align the tail pointers we read from the
- * hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
- */
- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
- "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
- head, tail))
- return -EIO;
-
-
for (/* none */;
(taken = OA_TAKEN(tail, head));
head = (head + report_size) & mask) {
@@ -816,8 +712,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/*
* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
@@ -825,9 +719,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
head += gtt_offset;
I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
- dev_priv->perf.oa.oa_buffer.head = head;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
}
return ret;
@@ -943,8 +835,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
u32 mask = (OA_BUFFER_SIZE - 1);
size_t start_offset = *offset;
- unsigned long flags;
- unsigned int aged_tail_idx;
u32 head, tail;
u32 taken;
int ret = 0;
@@ -952,39 +842,17 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
+ rmb();
- /* An invalid tail pointer here means we're still waiting for the poll
- * hrtimer callback to give us a pointer
- */
- if (tail == INVALID_TAIL_PTR)
- return -EAGAIN;
-
- /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
+ /*
+ * NB: oa_buffer.head/tail include the gtt_offset which we don't want
* while indexing relative to oa_buf_base.
*/
head -= gtt_offset;
tail -= gtt_offset;
- /* An out of bounds or misaligned head or tail pointer implies a driver
- * bug since we validate + align the tail pointers we read from the
- * hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
- */
- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
- "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
- head, tail))
- return -EIO;
-
-
for (/* none */;
(taken = OA_TAKEN(tail, head));
head = (head + report_size) & mask) {
@@ -1030,19 +898,15 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
}
if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/* We removed the gtt_offset for the copy loop above, indexing
* relative to oa_buf_base so put back here...
*/
head += gtt_offset;
I915_WRITE(GEN7_OASTATUS2,
- ((head & GEN7_OASTATUS2_HEAD_MASK) |
- GEN7_OASTATUS2_MEM_SELECT_GGTT));
- dev_priv->perf.oa.oa_buffer.head = head;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ (head & GEN7_OASTATUS2_HEAD_MASK) |
+ GEN7_OASTATUS2_MEM_SELECT_GGTT);
+ WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
}
return ret;
@@ -1381,9 +1245,6 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
{
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- unsigned long flags;
-
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
/* Pre-DevBDW: OABUFFER must be set with counters off,
* before OASTATUS1, but after OASTATUS2
@@ -1396,12 +1257,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
- /* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/* On Haswell we have to track which OASTATUS1 flags we've
* already seen since they can't be cleared while periodic
* sampling is enabled.
@@ -1430,9 +1285,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
{
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- unsigned long flags;
-
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
I915_WRITE(GEN8_OASTATUS, 0);
I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
@@ -1452,10 +1304,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
- /* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
/*
* Reset state used to recognise context switches, affecting which
* reports we will forward to userspace while filtering for a single
@@ -1463,8 +1311,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
*/
dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/*
* NB: although the OA buffer will initially be allocated
* zeroed via shmfs (and so this memset is redundant when
@@ -3473,7 +3319,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
INIT_LIST_HEAD(&dev_priv->perf.streams);
mutex_init(&dev_priv->perf.lock);
- spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
oa_sample_rate_hard_limit = 1000 *
(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
--
2.20.1
More information about the Intel-gfx-trybot
mailing list