[PATCH] drm/i915/oa: rmb ftw
Chris Wilson
chris at chris-wilson.co.uk
Tue Feb 19 01:22:42 UTC 2019
---
drivers/gpu/drm/i915/i915_drv.h | 59 ----
drivers/gpu/drm/i915/i915_perf.c | 493 ++++---------------------------
2 files changed, 64 insertions(+), 488 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 5c8d0489a1cd..773c205e1bbe 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1881,12 +1881,6 @@ struct drm_i915_private {
wait_queue_head_t poll_wq;
bool pollin;
- /**
- * For rate limiting any notifications of spurious
- * invalid OA reports
- */
- struct ratelimit_state spurious_report_rs;
-
bool periodic;
int period_exponent;
@@ -1899,59 +1893,6 @@ struct drm_i915_private {
int format;
int format_size;
- /**
- * Locks reads and writes to all head/tail state
- *
- * Consider: the head and tail pointer state
- * needs to be read consistently from a hrtimer
- * callback (atomic context) and read() fop
- * (user context) with tail pointer updates
- * happening in atomic context and head updates
- * in user context and the (unlikely)
- * possibility of read() errors needing to
- * reset all head/tail state.
- *
- * Note: Contention or performance aren't
- * currently a significant concern here
- * considering the relatively low frequency of
- * hrtimer callbacks (5ms period) and that
- * reads typically only happen in response to a
- * hrtimer event and likely complete before the
- * next callback.
- *
- * Note: This lock is not held *while* reading
- * and copying data to userspace so the value
- * of head observed in htrimer callbacks won't
- * represent any partial consumption of data.
- */
- spinlock_t ptr_lock;
-
- /**
- * One 'aging' tail pointer and one 'aged'
- * tail pointer ready to used for reading.
- *
- * Initial values of 0xffffffff are invalid
- * and imply that an update is required
- * (and should be ignored by an attempted
- * read)
- */
- struct {
- u32 offset;
- } tails[2];
-
- /**
- * Index for the aged tail ready to read()
- * data up to.
- */
- unsigned int aged_tail_idx;
-
- /**
- * A monotonic timestamp for when the current
- * aging tail pointer was read; used to
- * determine when it is old enough to trust.
- */
- u64 aging_timestamp;
-
/**
* Although we can always read back the head
* pointer register, we prefer to avoid
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9ebf99f3d8d3..fe2b9dc83955 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -217,53 +217,8 @@
* that the overflow cases are unlikely in normal operation.
*/
#define OA_BUFFER_SIZE SZ_16M
-
#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
-/**
- * DOC: OA Tail Pointer Race
- *
- * There's a HW race condition between OA unit tail pointer register updates and
- * writes to memory whereby the tail pointer can sometimes get ahead of what's
- * been written out to the OA buffer so far (in terms of what's visible to the
- * CPU).
- *
- * Although this can be observed explicitly while copying reports to userspace
- * by checking for a zeroed report-id field in tail reports, we want to account
- * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
- * read() attempts.
- *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- * 1) An 'aging' tail with an associated timestamp that is tracked until we
- * can trust the corresponding data is visible to the CPU; at which point
- * it is considered 'aged'.
- * 2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
- *
- * Most of the implementation details for this workaround are in
- * oa_buffer_check_unlocked() and _append_oa_reports()
- *
- * Note for posterity: previously the driver used to define an effective tail
- * pointer that lagged the real pointer by a 'tail margin' measured in bytes
- * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency.
- * This was flawed considering that the OA unit may also automatically generate
- * non-periodic reports (such as on context switch) or the OA unit may be
- * enabled without any periodic sampling.
- */
-#define OA_TAIL_MARGIN_NSEC 100000ULL
-#define INVALID_TAIL_PTR 0xffffffff
-
/* frequency for checking whether the OA unit has written new reports to the
* circular OA buffer...
*/
@@ -413,14 +368,12 @@ static int get_oa_config(struct drm_i915_private *dev_priv,
static u32 gen8_oa_hw_tail_read(struct drm_i915_private *dev_priv)
{
- return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
+ return I915_READ_FW(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK;
}
static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
{
- u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
-
- return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
+ return I915_READ_FW(GEN7_OASTATUS1) & GEN7_OASTATUS1_TAIL_MASK;
}
/**
@@ -431,112 +384,17 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
* check hrtimer (atomic ctx) to check the OA buffer tail pointer and check
* if there is data available for userspace to read.
*
- * This function is central to providing a workaround for the OA unit tail
- * pointer having a race with respect to what data is visible to the CPU.
- * It is responsible for reading tail pointers from the hardware and giving
- * the pointers time to 'age' before they are made available for reading.
- * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
- *
- * Besides returning true when there is data available to read() this function
- * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp
- * and .aged_tail_idx state used for reading.
- *
- * Note: It's safe to read OA config state here unlocked, assuming that this is
- * only called while the stream is enabled, while the global OA configuration
- * can't be modified.
- *
* Returns: %true if the OA buffer contains data, else %false
*/
static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
{
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- unsigned long flags;
- unsigned int aged_idx;
- u32 head, hw_tail, aged_tail, aging_tail;
- u64 now;
-
- /* We have to consider the (unlikely) possibility that read() errors
- * could result in an OA buffer reset which might reset the head,
- * tails[] and aged_tail state.
- */
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /* NB: The head we observe here might effectively be a little out of
- * date (between head and tails[aged_idx].offset if there is currently
- * a read() in progress.
- */
- head = dev_priv->perf.oa.oa_buffer.head;
-
- aged_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- aged_tail = dev_priv->perf.oa.oa_buffer.tails[aged_idx].offset;
- aging_tail = dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset;
-
- hw_tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
-
- /* The tail pointer increases in 64 byte increments,
- * not in report_size steps...
- */
- hw_tail &= ~(report_size - 1);
-
- now = ktime_get_mono_fast_ns();
-
- /* Update the aged tail
- *
- * Flip the tail pointer available for read()s once the aging tail is
- * old enough to trust that the corresponding data will be visible to
- * the CPU...
- *
- * Do this before updating the aging pointer in case we may be able to
- * immediately start aging a new pointer too (if new data has become
- * available) without needing to wait for a later hrtimer callback.
- */
- if (aging_tail != INVALID_TAIL_PTR &&
- ((now - dev_priv->perf.oa.oa_buffer.aging_timestamp) >
- OA_TAIL_MARGIN_NSEC)) {
-
- aged_idx ^= 1;
- dev_priv->perf.oa.oa_buffer.aged_tail_idx = aged_idx;
-
- aged_tail = aging_tail;
-
- /* Mark that we need a new pointer to start aging... */
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
- aging_tail = INVALID_TAIL_PTR;
- }
-
- /* Update the aging tail
- *
- * We throttle aging tail updates until we have a new tail that
- * represents >= one report more data than is already available for
- * reading. This ensures there will be enough data for a successful
- * read once this new pointer has aged and ensures we will give the new
- * pointer time to age.
- */
- if (aging_tail == INVALID_TAIL_PTR &&
- (aged_tail == INVALID_TAIL_PTR ||
- OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
- struct i915_vma *vma = dev_priv->perf.oa.oa_buffer.vma;
- u32 gtt_offset = i915_ggtt_offset(vma);
-
- /* Be paranoid and do a bounds check on the pointer read back
- * from hardware, just in case some spurious hardware condition
- * could put the tail out of bounds...
- */
- if (hw_tail >= gtt_offset &&
- hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
- dev_priv->perf.oa.oa_buffer.tails[!aged_idx].offset =
- aging_tail = hw_tail;
- dev_priv->perf.oa.oa_buffer.aging_timestamp = now;
- } else {
- DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
- hw_tail);
- }
- }
+ u32 head, tail;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head);
+ tail = dev_priv->perf.oa.ops.oa_hw_tail_read(dev_priv);
- return aged_tail == INVALID_TAIL_PTR ?
- false : OA_TAKEN(aged_tail, head) >= report_size;
+ return OA_TAKEN(tail, head) >= report_size;
}
/**
@@ -594,18 +452,15 @@ static int append_oa_sample(struct i915_perf_stream *stream,
char __user *buf,
size_t count,
size_t *offset,
- const u8 *report)
+ const void *report,
+ int report_size)
{
- struct drm_i915_private *dev_priv = stream->dev_priv;
- int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- struct drm_i915_perf_record_header header;
- u32 sample_flags = stream->sample_flags;
-
- header.type = DRM_I915_PERF_RECORD_SAMPLE;
- header.pad = 0;
- header.size = stream->sample_size;
+ struct drm_i915_perf_record_header header = {
+ .type = DRM_I915_PERF_RECORD_SAMPLE,
+ .size = stream->sample_size,
+ };
- if ((count - *offset) < header.size)
+ if (count - *offset < header.size)
return -ENOSPC;
buf += *offset;
@@ -613,13 +468,11 @@ static int append_oa_sample(struct i915_perf_stream *stream,
return -EFAULT;
buf += sizeof(header);
- if (sample_flags & SAMPLE_OA_REPORT) {
- if (copy_to_user(buf, report, report_size))
- return -EFAULT;
- }
-
- (*offset) += header.size;
+ if (stream->sample_flags & SAMPLE_OA_REPORT &&
+ copy_to_user(buf, report, report_size))
+ return -EFAULT;
+ *offset += header.size;
return 0;
}
@@ -649,77 +502,24 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ const void *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- u32 mask = (OA_BUFFER_SIZE - 1);
- size_t start_offset = *offset;
- unsigned long flags;
- unsigned int aged_tail_idx;
+ u32 report[64] __attribute__((aligned(16)));
u32 head, tail;
- u32 taken;
int ret = 0;
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /*
- * An invalid tail pointer here means we're still waiting for the poll
- * hrtimer callback to give us a pointer
- */
- if (tail == INVALID_TAIL_PTR)
- return -EAGAIN;
-
- /*
- * NB: oa_buffer.head/tail include the gtt_offset which we don't want
- * while indexing relative to oa_buf_base.
- */
- head -= gtt_offset;
- tail -= gtt_offset;
-
- /*
- * An out of bounds or misaligned head or tail pointer implies a driver
- * bug since we validate + align the tail pointers we read from the
- * hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
- */
- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
- "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
- head, tail))
- return -EIO;
-
-
- for (/* none */;
- (taken = OA_TAKEN(tail, head));
- head = (head + report_size) & mask) {
- u8 *report = oa_buf_base + head;
- u32 *report32 = (void *)report;
+ for (head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head) - gtt_offset,
+ tail = gen8_oa_hw_tail_read(dev_priv) - gtt_offset;
+ OA_TAKEN(tail, head) >= report_size;
+ head = (head + report_size) & (OA_BUFFER_SIZE - 1)) {
u32 ctx_id;
u32 reason;
- /*
- * All the report sizes factor neatly into the buffer
- * size so we never expect to see a report split
- * between the beginning and end of the buffer.
- *
- * Given the initial alignment check a misalignment
- * here would imply a driver bug that would result
- * in an overrun.
- */
- if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
- DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
- break;
- }
+ i915_memcpy_from_wc(report, oa_buf_base + head, report_size);
/*
* The reason field includes flags identifying what
@@ -730,15 +530,12 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* check that the report isn't invalid before copying
* it to userspace...
*/
- reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
+ reason = ((report[0] >> OAREPORT_REASON_SHIFT) &
OAREPORT_REASON_MASK);
- if (reason == 0) {
- if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
- DRM_NOTE("Skipping spurious, invalid OA report\n");
+ if (reason == 0)
continue;
- }
- ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
+ ctx_id = report[2] & dev_priv->perf.oa.specific_ctx_id_mask;
/*
* Squash whatever is in the CTX_ID field if it's marked as
@@ -748,8 +545,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* Note: that we don't clear the valid_ctx_bit so userspace can
* understand that the ID has been squashed by the kernel.
*/
- if (!(report32[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
- ctx_id = report32[2] = INVALID_CTX_ID;
+ if (!(report[0] & dev_priv->perf.oa.gen8_valid_ctx_bit))
+ ctx_id = report[2] = INVALID_CTX_ID;
/*
* NB: For Gen 8 the OA unit no longer supports clock gating
@@ -793,42 +590,21 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
* leaking the IDs of other contexts.
*/
if (dev_priv->perf.oa.exclusive_stream->ctx &&
- dev_priv->perf.oa.specific_ctx_id != ctx_id) {
- report32[2] = INVALID_CTX_ID;
- }
+ dev_priv->perf.oa.specific_ctx_id != ctx_id)
+ report[2] = INVALID_CTX_ID;
ret = append_oa_sample(stream, buf, count, offset,
- report);
+ report, report_size);
if (ret)
break;
dev_priv->perf.oa.oa_buffer.last_ctx_id = ctx_id;
}
-
- /*
- * The above reason field sanity check is based on
- * the assumption that the OA buffer is initially
- * zeroed and we reset the field after copying so the
- * check is still meaningful once old reports start
- * being overwritten.
- */
- report32[0] = 0;
}
- if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /*
- * We removed the gtt_offset for the copy loop above, indexing
- * relative to oa_buf_base so put back here...
- */
- head += gtt_offset;
-
- I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
- dev_priv->perf.oa.oa_buffer.head = head;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
- }
+ head += gtt_offset;
+ I915_WRITE_FW(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK);
+ WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
return ret;
}
@@ -938,112 +714,43 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
size_t *offset)
{
struct drm_i915_private *dev_priv = stream->dev_priv;
+ const void *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
int report_size = dev_priv->perf.oa.oa_buffer.format_size;
- u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- u32 mask = (OA_BUFFER_SIZE - 1);
- size_t start_offset = *offset;
- unsigned long flags;
- unsigned int aged_tail_idx;
+ u32 report[64] __attribute__((aligned(16)));
u32 head, tail;
- u32 taken;
int ret = 0;
if (WARN_ON(!stream->enabled))
return -EIO;
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
+ for (head = READ_ONCE(dev_priv->perf.oa.oa_buffer.head) - gtt_offset,
+ tail = gen7_oa_hw_tail_read(dev_priv) - gtt_offset;
+ OA_TAKEN(tail, head) >= report_size;
+ head = (head + report_size) & (OA_BUFFER_SIZE - 1)) {
+ i915_memcpy_from_wc(report, oa_buf_base + head, report_size);
- head = dev_priv->perf.oa.oa_buffer.head;
- aged_tail_idx = dev_priv->perf.oa.oa_buffer.aged_tail_idx;
- tail = dev_priv->perf.oa.oa_buffer.tails[aged_tail_idx].offset;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /* An invalid tail pointer here means we're still waiting for the poll
- * hrtimer callback to give us a pointer
- */
- if (tail == INVALID_TAIL_PTR)
- return -EAGAIN;
-
- /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
- * while indexing relative to oa_buf_base.
- */
- head -= gtt_offset;
- tail -= gtt_offset;
-
- /* An out of bounds or misaligned head or tail pointer implies a driver
- * bug since we validate + align the tail pointers we read from the
- * hardware and we are in full control of the head pointer which should
- * only be incremented by multiples of the report size (notably also
- * all a power of two).
- */
- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
- tail > OA_BUFFER_SIZE || tail % report_size,
- "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
- head, tail))
- return -EIO;
-
-
- for (/* none */;
- (taken = OA_TAKEN(tail, head));
- head = (head + report_size) & mask) {
- u8 *report = oa_buf_base + head;
- u32 *report32 = (void *)report;
-
- /* All the report sizes factor neatly into the buffer
- * size so we never expect to see a report split
- * between the beginning and end of the buffer.
- *
- * Given the initial alignment check a misalignment
- * here would imply a driver bug that would result
- * in an overrun.
- */
- if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
- DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
- break;
- }
-
- /* The report-ID field for periodic samples includes
+ /*
+ * The report-ID field for periodic samples includes
* some undocumented flags related to what triggered
* the report and is never expected to be zero so we
* can check that the report isn't invalid before
* copying it to userspace...
*/
- if (report32[0] == 0) {
- if (__ratelimit(&dev_priv->perf.oa.spurious_report_rs))
- DRM_NOTE("Skipping spurious, invalid OA report\n");
+ if (report[0] == 0)
continue;
- }
- ret = append_oa_sample(stream, buf, count, offset, report);
+ ret = append_oa_sample(stream, buf, count, offset,
+ report, report_size);
if (ret)
break;
-
- /* The above report-id field sanity check is based on
- * the assumption that the OA buffer is initially
- * zeroed and we reset the field after copying so the
- * check is still meaningful once old reports start
- * being overwritten.
- */
- report32[0] = 0;
}
- if (start_offset != *offset) {
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /* We removed the gtt_offset for the copy loop above, indexing
- * relative to oa_buf_base so put back here...
- */
- head += gtt_offset;
-
- I915_WRITE(GEN7_OASTATUS2,
- ((head & GEN7_OASTATUS2_HEAD_MASK) |
- GEN7_OASTATUS2_MEM_SELECT_GGTT));
- dev_priv->perf.oa.oa_buffer.head = head;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
- }
+ head += gtt_offset;
+ I915_WRITE_FW(GEN7_OASTATUS2,
+ (head & GEN7_OASTATUS2_HEAD_MASK) |
+ GEN7_OASTATUS2_MEM_SELECT_GGTT);
+ WRITE_ONCE(dev_priv->perf.oa.oa_buffer.head, head);
return ret;
}
@@ -1339,12 +1046,10 @@ free_oa_buffer(struct drm_i915_private *i915)
{
mutex_lock(&i915->drm.struct_mutex);
- i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma,
- I915_VMA_RELEASE_MAP);
+ i915_vma_unpin_iomap(i915->perf.oa.oa_buffer.vma);
+ i915_vma_unpin_and_release(&i915->perf.oa.oa_buffer.vma, 0);
mutex_unlock(&i915->drm.struct_mutex);
-
- i915->perf.oa.oa_buffer.vaddr = NULL;
}
static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
@@ -1371,19 +1076,11 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
oa_put_render_ctx_id(stream);
put_oa_config(dev_priv, stream->oa_config);
-
- if (dev_priv->perf.oa.spurious_report_rs.missed) {
- DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
- dev_priv->perf.oa.spurious_report_rs.missed);
- }
}
static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
{
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- unsigned long flags;
-
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
/* Pre-DevBDW: OABUFFER must be set with counters off,
* before OASTATUS1, but after OASTATUS2
@@ -1396,31 +1093,12 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
- /* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
/* On Haswell we have to track which OASTATUS1 flags we've
* already seen since they can't be cleared while periodic
* sampling is enabled.
*/
dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
- /* NB: although the OA buffer will initially be allocated
- * zeroed via shmfs (and so this memset is redundant when
- * first allocating), we may re-init the OA buffer, either
- * when re-enabling a stream or in error/reset paths.
- *
- * The reason we clear the buffer for each re-init is for the
- * sanity check in gen7_append_oa_reports() that looks at the
- * report-id field to make sure it's non-zero which relies on
- * the assumption that new reports are being written to zeroed
- * memory...
- */
- memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
/* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
*/
@@ -1430,9 +1108,6 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
{
u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
- unsigned long flags;
-
- spin_lock_irqsave(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
I915_WRITE(GEN8_OASTATUS, 0);
I915_WRITE(GEN8_OAHEADPTR, gtt_offset);
@@ -1452,10 +1127,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT);
I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
- /* Mark that we need updated tail pointers to read from... */
- dev_priv->perf.oa.oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
- dev_priv->perf.oa.oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
-
/*
* Reset state used to recognise context switches, affecting which
* reports we will forward to userspace while filtering for a single
@@ -1463,22 +1134,6 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv)
*/
dev_priv->perf.oa.oa_buffer.last_ctx_id = INVALID_CTX_ID;
- spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags);
-
- /*
- * NB: although the OA buffer will initially be allocated
- * zeroed via shmfs (and so this memset is redundant when
- * first allocating), we may re-init the OA buffer, either
- * when re-enabling a stream or in error/reset paths.
- *
- * The reason we clear the buffer for each re-init is for the
- * sanity check in gen8_append_oa_reports() that looks at the
- * reason field to make sure it's non-zero which relies on
- * the assumption that new reports are being written to zeroed
- * memory...
- */
- memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
-
/*
* Maybe make ->pollin per-stream state if we support multiple
* concurrent streams in the future.
@@ -1513,16 +1168,19 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
if (ret)
goto err_unref;
+ ret = i915_gem_object_set_to_gtt_domain(bo, true);
+ if (ret)
+ goto err_unref;
+
/* PreHSW required 512K alignment, HSW requires 16M */
- vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
+ vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, PIN_MAPPABLE);
if (IS_ERR(vma)) {
ret = PTR_ERR(vma);
goto err_unref;
}
dev_priv->perf.oa.oa_buffer.vma = vma;
- dev_priv->perf.oa.oa_buffer.vaddr =
- i915_gem_object_pin_map(bo, I915_MAP_WB);
+ dev_priv->perf.oa.oa_buffer.vaddr = i915_vma_pin_iomap(vma);
if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
goto err_unpin;
@@ -1540,7 +1198,6 @@ static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
err_unref:
i915_gem_object_put(bo);
- dev_priv->perf.oa.oa_buffer.vaddr = NULL;
dev_priv->perf.oa.oa_buffer.vma = NULL;
unlock:
@@ -1865,8 +1522,7 @@ static void gen7_oa_enable(struct i915_perf_stream *stream)
I915_WRITE(GEN7_OACONTROL,
(ctx_id & GEN7_OACONTROL_CTX_MASK) |
- (period_exponent <<
- GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
+ (period_exponent << GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
(periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
(report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
(ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
@@ -1894,9 +1550,9 @@ static void gen8_oa_enable(struct i915_perf_stream *stream)
* filtering and instead filter on the cpu based on the context-id
* field of reports
*/
- I915_WRITE(GEN8_OACONTROL, (report_format <<
- GEN8_OA_REPORT_FORMAT_SHIFT) |
- GEN8_OA_COUNTER_ENABLE);
+ I915_WRITE(GEN8_OACONTROL,
+ (report_format << GEN8_OA_REPORT_FORMAT_SHIFT) |
+ GEN8_OA_COUNTER_ENABLE);
}
/**
@@ -2028,26 +1684,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- /* We set up some ratelimit state to potentially throttle any _NOTES
- * about spurious, invalid OA reports which we don't forward to
- * userspace.
- *
- * The initialization is associated with opening the stream (not driver
- * init) considering we print a _NOTE about any throttling when closing
- * the stream instead of waiting until driver _fini which no one would
- * ever see.
- *
- * Using the same limiting factors as printk_ratelimit()
- */
- ratelimit_state_init(&dev_priv->perf.oa.spurious_report_rs,
- 5 * HZ, 10);
- /* Since we use a DRM_NOTE for spurious reports it would be
- * inconsistent to let __ratelimit() automatically print a warning for
- * throttling.
- */
- ratelimit_set_flags(&dev_priv->perf.oa.spurious_report_rs,
- RATELIMIT_MSG_ON_RELEASE);
-
stream->sample_size = sizeof(struct drm_i915_perf_record_header);
format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
@@ -3473,7 +3109,6 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
INIT_LIST_HEAD(&dev_priv->perf.streams);
mutex_init(&dev_priv->perf.lock);
- spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
oa_sample_rate_hard_limit = 1000 *
(RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2);
--
2.20.1
More information about the Intel-gfx-trybot
mailing list