[Intel-gfx] [PATCH 1/4] drm/i915/perf: rework aging tail workaround
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Fri Mar 13 09:54:54 UTC 2020
On 13/03/2020 01:04, Umesh Nerlige Ramappa wrote:
> From: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>
> We're about to introduce an options to open the perf stream, giving
> the user ability to configure how often it wants the kernel to poll
> the OA registers for available data.
>
> Right now the workaround against the OA tail pointer race condition
> requires at least twice the internal kernel polling timer to make any
> data available.
>
> This changes introduce checks on the OA data written into the circular
> buffer to make as much data as possible available on the first
> iteration of the polling timer.
>
> v2: Use OA_TAKEN macro without the gtt_offset (Lionel)
> v3: (Umesh)
> - Rebase
> - Change report to report32 from below review
> https://patchwork.freedesktop.org/patch/330704/?series=66697&rev=1
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
> ---
> drivers/gpu/drm/i915/i915_perf.c | 207 +++++++++++--------------
> drivers/gpu/drm/i915/i915_perf_types.h | 29 ++--
> 2 files changed, 106 insertions(+), 130 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 1b074bb4a7fe..92c5c75e2a2b 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -241,23 +241,14 @@
> * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
> * read() attempts.
> *
> - * In effect we define a tail pointer for reading that lags the real tail
> - * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
> - * time for the corresponding reports to become visible to the CPU.
> - *
> - * To manage this we actually track two tail pointers:
> - * 1) An 'aging' tail with an associated timestamp that is tracked until we
> - * can trust the corresponding data is visible to the CPU; at which point
> - * it is considered 'aged'.
> - * 2) An 'aged' tail that can be used for read()ing.
> - *
> - * The two separate pointers let us decouple read()s from tail pointer aging.
> - *
> - * The tail pointers are checked and updated at a limited rate within a hrtimer
> - * callback (the same callback that is used for delivering EPOLLIN events)
> - *
> - * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
> - * indicates that an updated tail pointer is needed.
> + * We workaround this issue in oa_buffer_check() by reading the reports in the
s/oa_buffer_check/oa_buffer_check_unlocked/
> + * OA buffer, starting from the tail reported by the HW until we find 2
> + * consecutive reports with their first 2 dwords of not at 0. Those dwords are
> + * also set to 0 once read and the whole buffer is cleared upon OA buffer
> + * initialization. The first dword is the reason for this report while the
> + * second is the timestamp, making the chances of having those 2 fields at 0
> + * fairly unlikely. A more detailed explanation is available in
> + * oa_buffer_check().
> *
> * Most of the implementation details for this workaround are in
> * oa_buffer_check_unlocked() and _append_oa_reports()
> @@ -270,7 +261,6 @@
> * enabled without any periodic sampling.
> */
> #define OA_TAIL_MARGIN_NSEC 100000ULL
> -#define INVALID_TAIL_PTR 0xffffffff
>
> /* frequency for checking whether the OA unit has written new reports to the
> * circular OA buffer...
> @@ -476,10 +466,10 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
> */
> static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> {
> + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> int report_size = stream->oa_buffer.format_size;
> unsigned long flags;
> - unsigned int aged_idx;
> - u32 head, hw_tail, aged_tail, aging_tail;
> + u32 hw_tail;
> u64 now;
>
> /* We have to consider the (unlikely) possibility that read() errors
> @@ -488,16 +478,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
> */
> spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> - /* NB: The head we observe here might effectively be a little out of
> - * date (between head and tails[aged_idx].offset if there is currently
> - * a read() in progress.
> - */
> - head = stream->oa_buffer.head;
> -
> - aged_idx = stream->oa_buffer.aged_tail_idx;
> - aged_tail = stream->oa_buffer.tails[aged_idx].offset;
> - aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
> -
> hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
>
> /* The tail pointer increases in 64 byte increments,
> @@ -507,64 +487,76 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>
> now = ktime_get_mono_fast_ns();
>
> - /* Update the aged tail
> - *
> - * Flip the tail pointer available for read()s once the aging tail is
> - * old enough to trust that the corresponding data will be visible to
> - * the CPU...
> - *
> - * Do this before updating the aging pointer in case we may be able to
> - * immediately start aging a new pointer too (if new data has become
> - * available) without needing to wait for a later hrtimer callback.
> - */
> - if (aging_tail != INVALID_TAIL_PTR &&
> - ((now - stream->oa_buffer.aging_timestamp) >
> - OA_TAIL_MARGIN_NSEC)) {
> -
> - aged_idx ^= 1;
> - stream->oa_buffer.aged_tail_idx = aged_idx;
> + if (hw_tail == stream->oa_buffer.aging_tail) {
> + /* If the HW tail hasn't move since the last check and the HW
> + * tail has been aging for long enough, declare it the new
> + * tail.
> + */
> + if ((now - stream->oa_buffer.aging_timestamp) >
> + OA_TAIL_MARGIN_NSEC) {
> + stream->oa_buffer.tail =
> + stream->oa_buffer.aging_tail;
> + }
> + } else {
> + u32 head, tail, landed_report_heads;
>
> - aged_tail = aging_tail;
> + /* NB: The head we observe here might effectively be a little out of
> + * date (between head and tails[aged_idx].offset if there is currently
> + * a read() in progress.
> + */
> + head = stream->oa_buffer.head - gtt_offset;
>
> - /* Mark that we need a new pointer to start aging... */
> - stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
> - aging_tail = INVALID_TAIL_PTR;
> - }
> + hw_tail -= gtt_offset;
> + tail = hw_tail;
>
> - /* Update the aging tail
> - *
> - * We throttle aging tail updates until we have a new tail that
> - * represents >= one report more data than is already available for
> - * reading. This ensures there will be enough data for a successful
> - * read once this new pointer has aged and ensures we will give the new
> - * pointer time to age.
> - */
> - if (aging_tail == INVALID_TAIL_PTR &&
> - (aged_tail == INVALID_TAIL_PTR ||
> - OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
> - struct i915_vma *vma = stream->oa_buffer.vma;
> - u32 gtt_offset = i915_ggtt_offset(vma);
> -
> - /* Be paranoid and do a bounds check on the pointer read back
> - * from hardware, just in case some spurious hardware condition
> - * could put the tail out of bounds...
> + /* Walk the stream backward until we find at least 2 reports
> + * with dword 0 & 1 not at 0. Since the circular buffer
> + * pointers progress by increments of 64 bytes and that
> + * reports can be up to 256 bytes long, we can't tell whether
> + * a report has fully landed in memory before the first 2
> + * dwords of the following report have effectively landed.
> + *
> + * This is assuming that the writes of the OA unit land in
> + * memory in the order they were written to.
> + * If not : (╯°□°)╯︵ ┻━┻
> */
> - if (hw_tail >= gtt_offset &&
> - hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
> - stream->oa_buffer.tails[!aged_idx].offset =
> - aging_tail = hw_tail;
> - stream->oa_buffer.aging_timestamp = now;
> - } else {
> - drm_err(&stream->perf->i915->drm,
> - "Ignoring spurious out of range OA buffer tail pointer = %x\n",
> - hw_tail);
> + landed_report_heads = 0;
> + while (OA_TAKEN(tail, head) >= report_size) {
> + u32 previous_tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
> + u8 *report = stream->oa_buffer.vaddr + previous_tail;
> + u32 *report32 = (void *) report;
> +
> + /* Head of the report indicated by the HW tail register has
> + * indeed landed into memory.
> + */
> + if (report32[0] != 0 || report32[1] != 0) {
> + landed_report_heads++;
> +
> + if (landed_report_heads >= 2)
> + break;
> + }
> +
> + tail = previous_tail;
> }
> +
> + if (abs(tail - hw_tail) >= (2 * report_size)) {
> + if (__ratelimit(&stream->perf->tail_pointer_race)) {
> + DRM_NOTE("unlanded report(s) head=0x%x "
> + "tail=0x%x hw_tail=0x%x\n",
> + head, tail, hw_tail);
> + }
> + }
> +
> + stream->oa_buffer.tail = gtt_offset + tail;
> + stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
> + stream->oa_buffer.aging_timestamp = now;
> }
>
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> - return aged_tail == INVALID_TAIL_PTR ?
> - false : OA_TAKEN(aged_tail, head) >= report_size;
> + return OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> + stream->oa_buffer.head - gtt_offset) >= report_size;
> +
> }
>
> /**
> @@ -682,7 +674,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> u32 mask = (OA_BUFFER_SIZE - 1);
> size_t start_offset = *offset;
> unsigned long flags;
> - unsigned int aged_tail_idx;
> u32 head, tail;
> u32 taken;
> int ret = 0;
> @@ -693,18 +684,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> head = stream->oa_buffer.head;
> - aged_tail_idx = stream->oa_buffer.aged_tail_idx;
> - tail = stream->oa_buffer.tails[aged_tail_idx].offset;
> + tail = stream->oa_buffer.tail;
>
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> - /*
> - * An invalid tail pointer here means we're still waiting for the poll
> - * hrtimer callback to give us a pointer
> - */
> - if (tail == INVALID_TAIL_PTR)
> - return -EAGAIN;
> -
> /*
> * NB: oa_buffer.head/tail include the gtt_offset which we don't want
> * while indexing relative to oa_buf_base.
> @@ -838,13 +821,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
> }
>
> /*
> - * The above reason field sanity check is based on
> - * the assumption that the OA buffer is initially
> - * zeroed and we reset the field after copying so the
> - * check is still meaningful once old reports start
> - * being overwritten.
> + * Clear out the first 2 dword as a mean to detect unlanded
> + * reports.
> */
> - report32[0] = 0;
> + report32[0] = report32[1] = 0;
Looks like the scripts are complaining about this. You'll have to split
it in 2 lines.
> }
>
> if (start_offset != *offset) {
> @@ -985,7 +965,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> u32 mask = (OA_BUFFER_SIZE - 1);
> size_t start_offset = *offset;
> unsigned long flags;
> - unsigned int aged_tail_idx;
> u32 head, tail;
> u32 taken;
> int ret = 0;
> @@ -996,17 +975,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> head = stream->oa_buffer.head;
> - aged_tail_idx = stream->oa_buffer.aged_tail_idx;
> - tail = stream->oa_buffer.tails[aged_tail_idx].offset;
> + tail = stream->oa_buffer.tail;
>
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> - /* An invalid tail pointer here means we're still waiting for the poll
> - * hrtimer callback to give us a pointer
> - */
> - if (tail == INVALID_TAIL_PTR)
> - return -EAGAIN;
> -
> /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
> * while indexing relative to oa_buf_base.
> */
> @@ -1064,13 +1036,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
> if (ret)
> break;
>
> - /* The above report-id field sanity check is based on
> - * the assumption that the OA buffer is initially
> - * zeroed and we reset the field after copying so the
> - * check is still meaningful once old reports start
> - * being overwritten.
> + /* Clear out the first 2 dwords as a mean to detect unlanded
> + * reports.
> */
> - report32[0] = 0;
> + report32[0] = report32[1] = 0;
Same here.
> }
>
> if (start_offset != *offset) {
> @@ -1449,8 +1418,8 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
> gtt_offset | OABUFFER_SIZE_16M);
>
> /* Mark that we need updated tail pointers to read from... */
> - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> + stream->oa_buffer.aging_tail =
> + stream->oa_buffer.tail = gtt_offset;
>
> spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -1503,8 +1472,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
> intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
>
> /* Mark that we need updated tail pointers to read from... */
> - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> + stream->oa_buffer.aging_tail =
> + stream->oa_buffer.tail = gtt_offset;
>
> /*
> * Reset state used to recognise context switches, affecting which
> @@ -1559,8 +1528,8 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream)
> gtt_offset & GEN12_OAG_OATAILPTR_MASK);
>
> /* Mark that we need updated tail pointers to read from... */
> - stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> - stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> + stream->oa_buffer.aging_tail =
> + stream->oa_buffer.tail = gtt_offset;
>
> /*
> * Reset state used to recognise context switches, affecting which
> @@ -4408,6 +4377,12 @@ void i915_perf_init(struct drm_i915_private *i915)
> ratelimit_set_flags(&perf->spurious_report_rs,
> RATELIMIT_MSG_ON_RELEASE);
>
> + ratelimit_state_init(&perf->tail_pointer_race,
> + 5 * HZ, 10);
> + ratelimit_set_flags(&perf->tail_pointer_race,
> + RATELIMIT_MSG_ON_RELEASE);
> +
> +
> atomic64_set(&perf->noa_programming_delay,
> 500 * 1000 /* 500us */);
>
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
> index a0e22f00f6cf..9ee7c58e70d5 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -272,21 +272,10 @@ struct i915_perf_stream {
> spinlock_t ptr_lock;
>
> /**
> - * @tails: One 'aging' tail pointer and one 'aged' tail pointer ready to
> - * used for reading.
> - *
> - * Initial values of 0xffffffff are invalid and imply that an
> - * update is required (and should be ignored by an attempted
> - * read)
> + * @aging_tail: The last HW tail reported by HW. The data
> + * might not have made it to memory yet though.
> */
> - struct {
> - u32 offset;
> - } tails[2];
> -
> - /**
> - * @aged_tail_idx: Index for the aged tail ready to read() data up to.
> - */
> - unsigned int aged_tail_idx;
> + u32 aging_tail;
>
> /**
> * @aging_timestamp: A monotonic timestamp for when the current aging tail pointer
> @@ -302,6 +291,12 @@ struct i915_perf_stream {
> * OA buffer data to userspace.
> */
> u32 head;
> +
> + /**
> + * @tail: The last tail verified tail that can be read by
> + * userspace.
> + */
> + u32 tail;
> } oa_buffer;
>
> /**
> @@ -413,6 +408,12 @@ struct i915_perf {
> */
> struct ratelimit_state spurious_report_rs;
>
> + /**
> + * For rate limiting any notifications of tail pointer
> + * race.
> + */
> + struct ratelimit_state tail_pointer_race;
> +
> struct i915_oa_config test_config;
>
> u32 gen7_latched_oastatus1;
More information about the Intel-gfx
mailing list