[Intel-gfx] [PATCH 06/19] drm/i915/perf: Use helpers to process reports w.r.t. OA buffer size

Dixit, Ashutosh ashutosh.dixit at intel.com
Wed Sep 14 16:04:10 UTC 2022


On Tue, 23 Aug 2022 13:41:42 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 6fc4f0d8fc5a..bbf1c574f393 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -385,6 +385,21 @@ static struct ctl_table_header *sysctl_header;
>
>  static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
>
> +static u32 _oa_taken(struct i915_perf_stream * stream, u32 tail, u32 head)

nit: no space between * and stream.

> +{
> +	u32 size = stream->oa_buffer.vma->size;
> +
> +	return tail >= head ? tail - head : size - (head - tail);
> +}

If we are doing this we should probably eliminate references to OA_TAKEN
which serves an identical purpose (I think there is one remaining
reference) and also delete OA_TAKEN #define.

> +
> +static u32 _rewind_tail(struct i915_perf_stream * stream, u32 relative_hw_tail,
> +			u32 rewind_delta)
> +{
> +	return rewind_delta > relative_hw_tail ?
> +	       stream->oa_buffer.vma->size - (rewind_delta - relative_hw_tail) :
> +	       relative_hw_tail - rewind_delta;
> +}

Also are we really saying here that we are supporting non-power-of-2 OA
buffer sizes? Because if we stayed with power-of-2 sizes the expression
above are nice and elegant and actually closer to the previous code being
changed in this patch. For example:

#include <linux/circ_buf.h>

static u32 _oa_taken(struct i915_perf_stream *stream, u32 tail, u32 head)
{
	return CIRC_CNT(tail, head, stream->oa_buffer.vma->size);
}

static u32 _rewind_tail(struct i915_perf_stream *stream, u32 relative_hw_tail,
       			u32 rewind_delta)
{
	return CIRC_CNT(relative_hw_tail, rewind_delta, stream->oa_buffer.vma->size);
}

Note that for power-of-2 sizes the two functions above are identical but we
should keep them separate for clarity (as is done in the patch) since they
are serving two different functions in the OA code.

Also another assumption in the code seems to be:

	stream->oa_buffer.vma->size == OA_BUFFER_SIZE

which I am pretty sure will not hold for arbitrary non-power-of-2 OA buffer
sizes? So we might as well stick with power-of-2 sizes and change later in
a separate patch only if needed?

Thanks.
--
Ashutosh

> +
>  void i915_oa_config_release(struct kref *ref)
>  {
>	struct i915_oa_config *oa_config =
> @@ -487,12 +502,14 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>	 * sizes need not be integral multiples or 64 or powers of 2.
>	 * Compute potentially partially landed report in the OA buffer
>	 */
> -	partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
> +	partial_report_size =
> +		_oa_taken(stream, hw_tail, stream->oa_buffer.tail);
>	partial_report_size %= report_size;
>
>	/* Subtract partial amount off the tail */
> -	hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
> -				(stream->oa_buffer.vma->size - 1));
> +	hw_tail = gtt_offset + _rewind_tail(stream,
> +					    hw_tail - gtt_offset,
> +					    partial_report_size);
>
>	now = ktime_get_mono_fast_ns();
>
> @@ -527,16 +544,16 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>		 * memory in the order they were written to.
>		 * If not : (╯°□°)╯︵ ┻━┻
>		 */
> -		while (OA_TAKEN(tail, aged_tail) >= report_size) {
> +		while (_oa_taken(stream, tail, aged_tail) >= report_size) {
>			u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
>
>			if (report32[0] != 0 || report32[1] != 0)
>				break;
>
> -			tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
> +			tail = _rewind_tail(stream, tail, report_size);
>		}
>
> -		if (OA_TAKEN(hw_tail, tail) > report_size &&
> +		if (_oa_taken(stream, hw_tail, tail) > report_size &&
>		    __ratelimit(&stream->perf->tail_pointer_race))
>			DRM_NOTE("unlanded report(s) head=0x%x "
>				 "tail=0x%x hw_tail=0x%x\n",
> @@ -547,8 +564,9 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>		stream->oa_buffer.aging_timestamp = now;
>	}
>
> -	pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> -			  stream->oa_buffer.head - gtt_offset) >= report_size;
> +	pollin = _oa_taken(stream,
> +			   stream->oa_buffer.tail,
> +			   stream->oa_buffer.head) >= report_size;
>
>	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -679,11 +697,9 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>	int report_size = stream->oa_buffer.format_size;
>	u8 *oa_buf_base = stream->oa_buffer.vaddr;
>	u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
> -	u32 mask = (OA_BUFFER_SIZE - 1);
>	size_t start_offset = *offset;
>	unsigned long flags;
> -	u32 head, tail;
> -	u32 taken;
> +	u32 head, tail, size;
>	int ret = 0;
>
>	if (drm_WARN_ON(&uncore->i915->drm, !stream->enabled))
> @@ -693,6 +709,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>
>	head = stream->oa_buffer.head;
>	tail = stream->oa_buffer.tail;
> +	size = stream->oa_buffer.vma->size;
>
>	spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -711,16 +728,15 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>	 * all a power of two).
>	 */
>	if (drm_WARN_ONCE(&uncore->i915->drm,
> -			  head > stream->oa_buffer.vma->size ||
> -			  tail > stream->oa_buffer.vma->size,
> +			  head > size || tail > size,
>			  "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>			  head, tail))
>		return -EIO;
>
>
>	for (/* none */;
> -	     (taken = OA_TAKEN(tail, head));
> -	     head = (head + report_size) & mask) {
> +	     _oa_taken(stream, tail, head);
> +	     head = (head + report_size) % size) {
>		u8 *report = oa_buf_base + head;
>		u32 *report32 = (void *)report;
>		u32 ctx_id;
> --
> 2.25.1
>


More information about the Intel-gfx mailing list