[Intel-gfx] [PATCH 1/3] drm/i915/perf: rework aging tail workaround

Lionel Landwerlin lionel.g.landwerlin at intel.com
Sun Sep 15 11:15:09 UTC 2019


On 14/09/2019 02:06, Umesh Nerlige Ramappa wrote:
> From: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>
> Right now the workaround against the OA tail pointer race condition
> requires at least twice the internal kernel polling timer to make any
> data available.
>
> This changes introduce checks on the OA data written into the circular
> buffer to make as much data as possible available on the first
> iteration of the polling timer.
>
> v2: Use OA_TAKEN macro without the gtt_offset (Lionel)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  |  30 ++---
>   drivers/gpu/drm/i915/i915_perf.c | 200 ++++++++++++++-----------------
>   2 files changed, 103 insertions(+), 127 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index bf600888b3f1..876aeaf3568e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1180,21 +1180,11 @@ struct i915_perf_stream {
>   		spinlock_t ptr_lock;
>   
>   		/**
> -		 * One 'aging' tail pointer and one 'aged' tail pointer ready to
> -		 * used for reading.
> -		 *
> -		 * Initial values of 0xffffffff are invalid and imply that an
> -		 * update is required (and should be ignored by an attempted
> -		 * read)
> -		 */
> -		struct {
> -			u32 offset;
> -		} tails[2];
> -
> -		/**
> -		 * Index for the aged tail ready to read() data up to.
> +		 * The last HW tail reported by HW. The data
> +		 * might not have made it to memory yet
> +		 * though.
>   		 */
> -		unsigned int aged_tail_idx;
> +		u32 aging_tail;
>   
>   		/**
>   		 * A monotonic timestamp for when the current aging tail pointer
> @@ -1210,6 +1200,12 @@ struct i915_perf_stream {
>   		 * OA buffer data to userspace.
>   		 */
>   		u32 head;
> +
> +		/**
> +		 * The last verified tail that can be read
> +		 * by user space
> +		 */
> +		u32 tail;
>   	} oa_buffer;
>   };
>   
> @@ -1693,6 +1689,12 @@ struct drm_i915_private {
>   		 */
>   		struct ratelimit_state spurious_report_rs;
>   
> +		/**
> +		 * For rate limiting any notifications of tail pointer
> +		 * race.
> +		 */
> +		struct ratelimit_state tail_pointer_race;
> +
>   		struct i915_oa_config test_config;
>   
>   		u32 gen7_latched_oastatus1;
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index c1b764233761..50b6d154fd46 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -237,23 +237,14 @@
>    * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
>    * read() attempts.
>    *
> - * In effect we define a tail pointer for reading that lags the real tail
> - * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
> - * time for the corresponding reports to become visible to the CPU.
> - *
> - * To manage this we actually track two tail pointers:
> - *  1) An 'aging' tail with an associated timestamp that is tracked until we
> - *     can trust the corresponding data is visible to the CPU; at which point
> - *     it is considered 'aged'.
> - *  2) An 'aged' tail that can be used for read()ing.
> - *
> - * The two separate pointers let us decouple read()s from tail pointer aging.
> - *
> - * The tail pointers are checked and updated at a limited rate within a hrtimer
> - * callback (the same callback that is used for delivering EPOLLIN events)
> - *
> - * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
> - * indicates that an updated tail pointer is needed.
> + * We workaround this issue in oa_buffer_check() by reading the reports in the
> + * OA buffer, starting from the tail reported by the HW until we find 2
> + * consecutive reports with their first 2 dwords of not at 0. Those dwords are
> + * also set to 0 once read and the whole buffer is cleared upon OA buffer
> + * initialization. The first dword is the reason for this report while the
> + * second is the timestamp, making the chances of having those 2 fields at 0
> + * fairly unlikely. A more detailed explanation is available in
> + * oa_buffer_check().
>    *
>    * Most of the implementation details for this workaround are in
>    * oa_buffer_check_unlocked() and _append_oa_reports()
> @@ -266,7 +257,6 @@
>    * enabled without any periodic sampling.
>    */
>   #define OA_TAIL_MARGIN_NSEC	100000ULL
> -#define INVALID_TAIL_PTR	0xffffffff
>   
>   /* frequency for checking whether the OA unit has written new reports to the
>    * circular OA buffer...
> @@ -457,10 +447,10 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
>   static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>   {
>   	struct drm_i915_private *dev_priv = stream->dev_priv;
> +	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>   	int report_size = stream->oa_buffer.format_size;
>   	unsigned long flags;
> -	unsigned int aged_idx;
> -	u32 head, hw_tail, aged_tail, aging_tail;
> +	u32 hw_tail;
>   	u64 now;
>   
>   	/* We have to consider the (unlikely) possibility that read() errors
> @@ -469,16 +459,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>   	 */
>   	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>   
> -	/* NB: The head we observe here might effectively be a little out of
> -	 * date (between head and tails[aged_idx].offset if there is currently
> -	 * a read() in progress.
> -	 */
> -	head = stream->oa_buffer.head;
> -
> -	aged_idx = stream->oa_buffer.aged_tail_idx;
> -	aged_tail = stream->oa_buffer.tails[aged_idx].offset;
> -	aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
> -
>   	hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
>   
>   	/* The tail pointer increases in 64 byte increments,
> @@ -488,63 +468,75 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>   
>   	now = ktime_get_mono_fast_ns();
>   
> -	/* Update the aged tail
> -	 *
> -	 * Flip the tail pointer available for read()s once the aging tail is
> -	 * old enough to trust that the corresponding data will be visible to
> -	 * the CPU...
> -	 *
> -	 * Do this before updating the aging pointer in case we may be able to
> -	 * immediately start aging a new pointer too (if new data has become
> -	 * available) without needing to wait for a later hrtimer callback.
> -	 */
> -	if (aging_tail != INVALID_TAIL_PTR &&
> -	    ((now - stream->oa_buffer.aging_timestamp) >
> -	     OA_TAIL_MARGIN_NSEC)) {
> -
> -		aged_idx ^= 1;
> -		stream->oa_buffer.aged_tail_idx = aged_idx;
> +	if (hw_tail == stream->oa_buffer.aging_tail) {
> +		/* If the HW tail hasn't move since the last check and the HW
> +		 * tail has been aging for long enough, declare it the new
> +		 * tail.
> +		 */
> +		if ((now - stream->oa_buffer.aging_timestamp) >
> +		    OA_TAIL_MARGIN_NSEC) {
> +			stream->oa_buffer.tail =
> +				stream->oa_buffer.aging_tail;
> +		}
> +	} else {
> +		u32 head, tail, landed_report_heads;
>   
> -		aged_tail = aging_tail;
> +		/* NB: The head we observe here might effectively be a little out of
> +		 * date (between head and tails[aged_idx].offset if there is currently
> +		 * a read() in progress.
> +		 */
> +		head = stream->oa_buffer.head - gtt_offset;
>   
> -		/* Mark that we need a new pointer to start aging... */
> -		stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
> -		aging_tail = INVALID_TAIL_PTR;
> -	}
> +		hw_tail -= gtt_offset;
> +		tail = hw_tail;
>   
> -	/* Update the aging tail
> -	 *
> -	 * We throttle aging tail updates until we have a new tail that
> -	 * represents >= one report more data than is already available for
> -	 * reading. This ensures there will be enough data for a successful
> -	 * read once this new pointer has aged and ensures we will give the new
> -	 * pointer time to age.
> -	 */
> -	if (aging_tail == INVALID_TAIL_PTR &&
> -	    (aged_tail == INVALID_TAIL_PTR ||
> -	     OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
> -		struct i915_vma *vma = stream->oa_buffer.vma;
> -		u32 gtt_offset = i915_ggtt_offset(vma);
> -
> -		/* Be paranoid and do a bounds check on the pointer read back
> -		 * from hardware, just in case some spurious hardware condition
> -		 * could put the tail out of bounds...
> +		/* Walk the stream backward until we find at least 2 reports
> +		 * with dword 0 & 1 not at 0. Since the circular buffer
> +		 * pointers progress by increments of 64 bytes and that
> +		 * reports can be up to 256 bytes long, we can't tell whether
> +		 * a report has fully landed in memory before the first 2
> +		 * dwords of the following report have effectively landed.
> +		 *
> +		 * This is assuming that the writes of the OA unit land in
> +		 * memory in the order they were written to.
> +		 * If not : (╯°□°)╯︵ ┻━┻
>   		 */
> -		if (hw_tail >= gtt_offset &&
> -		    hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
> -			stream->oa_buffer.tails[!aged_idx].offset =
> -				aging_tail = hw_tail;
> -			stream->oa_buffer.aging_timestamp = now;
> -		} else {
> -			DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n",
> -				  hw_tail);
> +		landed_report_heads = 0;
> +		while (OA_TAKEN(tail, head) >= report_size) {
> +			u32 previous_tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
> +			u8 *report = stream->oa_buffer.vaddr + previous_tail;
> +			u32 *report32 = (void *) report;
> +
> +			/* Head of the report indicated by the HW tail register has
> +			 * indeed landed into memory.
> +			 */
> +			if (report32[0] != 0 || report[1] != 0) {


Just noticed this is wrong, it should be : if (report32[0] != || 
report32[1] != 0) {

report is not a pointer to a dword.


-Lionel


> +				landed_report_heads++;
> +
> +				if (landed_report_heads >= 2)
> +					break;
> +			}
> +
> +			tail = previous_tail;
> +		}
> +
> +		if (abs(tail - hw_tail) >= (2 * report_size)) {
> +			if (__ratelimit(&dev_priv->perf.tail_pointer_race)) {
> +				DRM_NOTE("unlanded report(s) head=0x%x "
> +					 "tail=0x%x hw_tail=0x%x\n",
> +					 head, tail, hw_tail);
> +			}
>   		}
> +
> +		stream->oa_buffer.tail = gtt_offset + tail;
> +		stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
> +		stream->oa_buffer.aging_timestamp = now;
>   	}
>   
>   	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>   
> -	return aged_tail == INVALID_TAIL_PTR ?
> -		false : OA_TAKEN(aged_tail, head) >= report_size;
> +	return OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> +			stream->oa_buffer.head - gtt_offset) >= report_size;
>   }
>   
>   /**
> @@ -662,7 +654,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   	u32 mask = (OA_BUFFER_SIZE - 1);
>   	size_t start_offset = *offset;
>   	unsigned long flags;
> -	unsigned int aged_tail_idx;
>   	u32 head, tail;
>   	u32 taken;
>   	int ret = 0;
> @@ -673,18 +664,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>   
>   	head = stream->oa_buffer.head;
> -	aged_tail_idx = stream->oa_buffer.aged_tail_idx;
> -	tail = stream->oa_buffer.tails[aged_tail_idx].offset;
> +	tail = stream->oa_buffer.tail;
>   
>   	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>   
> -	/*
> -	 * An invalid tail pointer here means we're still waiting for the poll
> -	 * hrtimer callback to give us a pointer
> -	 */
> -	if (tail == INVALID_TAIL_PTR)
> -		return -EAGAIN;
> -
>   	/*
>   	 * NB: oa_buffer.head/tail include the gtt_offset which we don't want
>   	 * while indexing relative to oa_buf_base.
> @@ -812,13 +795,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>   		}
>   
>   		/*
> -		 * The above reason field sanity check is based on
> -		 * the assumption that the OA buffer is initially
> -		 * zeroed and we reset the field after copying so the
> -		 * check is still meaningful once old reports start
> -		 * being overwritten.
> +		 * Clear out the first 2 dword as a mean to detect unlanded
> +		 * reports.
>   		 */
> -		report32[0] = 0;
> +		report32[0] = report32[1] = 0;
>   	}
>   
>   	if (start_offset != *offset) {
> @@ -950,7 +930,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   	u32 mask = (OA_BUFFER_SIZE - 1);
>   	size_t start_offset = *offset;
>   	unsigned long flags;
> -	unsigned int aged_tail_idx;
>   	u32 head, tail;
>   	u32 taken;
>   	int ret = 0;
> @@ -961,17 +940,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   	spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>   
>   	head = stream->oa_buffer.head;
> -	aged_tail_idx = stream->oa_buffer.aged_tail_idx;
> -	tail = stream->oa_buffer.tails[aged_tail_idx].offset;
> +	tail = stream->oa_buffer.tail;
>   
>   	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>   
> -	/* An invalid tail pointer here means we're still waiting for the poll
> -	 * hrtimer callback to give us a pointer
> -	 */
> -	if (tail == INVALID_TAIL_PTR)
> -		return -EAGAIN;
> -
>   	/* NB: oa_buffer.head/tail include the gtt_offset which we don't want
>   	 * while indexing relative to oa_buf_base.
>   	 */
> @@ -1026,13 +998,10 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>   		if (ret)
>   			break;
>   
> -		/* The above report-id field sanity check is based on
> -		 * the assumption that the OA buffer is initially
> -		 * zeroed and we reset the field after copying so the
> -		 * check is still meaningful once old reports start
> -		 * being overwritten.
> +		/* Clear out the first 2 dwords as a mean to detect unlanded
> +		 * reports.
>   		 */
> -		report32[0] = 0;
> +		report32[0] = report32[1] = 0;
>   	}
>   
>   	if (start_offset != *offset) {
> @@ -1411,8 +1380,8 @@ static void gen7_init_oa_buffer(struct i915_perf_stream *stream)
>   	I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
>   
>   	/* Mark that we need updated tail pointers to read from... */
> -	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> -	stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> +	stream->oa_buffer.aging_tail =
> +		stream->oa_buffer.tail = gtt_offset;
>   
>   	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>   
> @@ -1468,8 +1437,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream)
>   	I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK);
>   
>   	/* Mark that we need updated tail pointers to read from... */
> -	stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> -	stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> +	stream->oa_buffer.aging_tail =
> +		stream->oa_buffer.tail = gtt_offset;
>   
>   	/*
>   	 * Reset state used to recognise context switches, affecting which
> @@ -3672,6 +3641,11 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>   		ratelimit_set_flags(&dev_priv->perf.spurious_report_rs,
>   				    RATELIMIT_MSG_ON_RELEASE);
>   
> +		ratelimit_state_init(&dev_priv->perf.tail_pointer_race,
> +				     5 * HZ, 10);
> +		ratelimit_set_flags(&dev_priv->perf.tail_pointer_race,
> +				    RATELIMIT_MSG_ON_RELEASE);
> +
>   		dev_priv->perf.initialized = true;
>   	}
>   }




More information about the Intel-gfx mailing list