[Intel-gfx] [PATCH 2/3] drm/i915/perf: Add support for report sizes that are not power of 2
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Mon Sep 16 19:17:54 UTC 2019
On Sun, Sep 15, 2019 at 02:24:41PM +0300, Lionel Landwerlin wrote:
>On 14/09/2019 02:06, Umesh Nerlige Ramappa wrote:
>>OA perf unit supports non-power of 2 report sizes. Enable support for
>>these sizes in the driver.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>---
>> drivers/gpu/drm/i915/i915_perf.c | 59 ++++++++++++--------------------
>> 1 file changed, 21 insertions(+), 38 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 50b6d154fd46..482fca3da7de 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -450,7 +450,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>> u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>> int report_size = stream->oa_buffer.format_size;
>> unsigned long flags;
>>- u32 hw_tail;
>>+ u32 hw_tail, aging_tail;
>> u64 now;
>> /* We have to consider the (unlikely) possibility that read() errors
>>@@ -459,16 +459,17 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>> */
>> spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>>- hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
>>+ hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream) - gtt_offset;
>>+ aging_tail = stream->oa_buffer.aging_tail - gtt_offset;
>> /* The tail pointer increases in 64 byte increments,
>> * not in report_size steps...
>> */
>>- hw_tail &= ~(report_size - 1);
>>+ hw_tail = OA_TAKEN(hw_tail, (OA_TAKEN(hw_tail, aging_tail) % report_size));
>
>
>I'm struggling to parse this line above and I'm not 100% sure it's correct.
>
>Could add a comment to explain what is going on?
The aging tail is always pointing to the boundary of a report whereas
the hw_tail is advancing in 64 byte increments.
The innermost OA_TAKEN is returning the number of bytes between the
hw_tail and the aging_tail. The modulo is getting the size of the
partial report (if any).
The outermost OA_TAKEN is subtracting the size of partial report from
the hw_tail to get a hw_tail that points to the boundary of the last
full report.
The value of hw_tail would be the same as from the deleted line of code
above this line.
Thanks,
Umesh
>
>
>Thanks,
>
>
>-Lionel
>
>
>> now = ktime_get_mono_fast_ns();
>>- if (hw_tail == stream->oa_buffer.aging_tail) {
>>+ if (hw_tail == aging_tail) {
>> /* If the HW tail hasn't move since the last check and the HW
>> * tail has been aging for long enough, declare it the new
>> * tail.
>>@@ -486,8 +487,6 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>> * a read() in progress.
>> */
>> head = stream->oa_buffer.head - gtt_offset;
>>-
>>- hw_tail -= gtt_offset;
>> tail = hw_tail;
>> /* Walk the stream backward until we find at least 2 reports
>>@@ -613,7 +612,18 @@ static int append_oa_sample(struct i915_perf_stream *stream,
>> buf += sizeof(header);
>> if (sample_flags & SAMPLE_OA_REPORT) {
>>- if (copy_to_user(buf, report, report_size))
>>+ u8 *oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
>>+ int report_size_partial = oa_buf_end - report;
>>+
>>+ if (report_size_partial < report_size) {
>>+ if (copy_to_user(buf, report, report_size_partial))
>>+ return -EFAULT;
>>+ buf += report_size_partial;
>>+
>>+ if (copy_to_user(buf, stream->oa_buffer.vaddr,
>>+ report_size - report_size_partial))
>>+ return -EFAULT;
>>+ } else if (copy_to_user(buf, report, report_size))
>> return -EFAULT;
>> }
>>@@ -682,8 +692,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>> * only be incremented by multiples of the report size (notably also
>> * all a power of two).
>> */
>>- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>- tail > OA_BUFFER_SIZE || tail % report_size,
>>+ if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>+ tail > OA_BUFFER_SIZE,
>> "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>> head, tail))
>> return -EIO;
>>@@ -697,20 +707,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>> u32 ctx_id;
>> u32 reason;
>>- /*
>>- * All the report sizes factor neatly into the buffer
>>- * size so we never expect to see a report split
>>- * between the beginning and end of the buffer.
>>- *
>>- * Given the initial alignment check a misalignment
>>- * here would imply a driver bug that would result
>>- * in an overrun.
>>- */
>>- if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>- DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
>>- break;
>>- }
>>-
>> /*
>> * The reason field includes flags identifying what
>> * triggered this specific report (mostly timer
>>@@ -956,8 +952,8 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>> * only be incremented by multiples of the report size (notably also
>> * all a power of two).
>> */
>>- if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>- tail > OA_BUFFER_SIZE || tail % report_size,
>>+ if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>+ tail > OA_BUFFER_SIZE,
>> "Inconsistent OA buffer pointers: head = %u, tail = %u\n",
>> head, tail))
>> return -EIO;
>>@@ -969,19 +965,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream,
>> u8 *report = oa_buf_base + head;
>> u32 *report32 = (void *)report;
>>- /* All the report sizes factor neatly into the buffer
>>- * size so we never expect to see a report split
>>- * between the beginning and end of the buffer.
>>- *
>>- * Given the initial alignment check a misalignment
>>- * here would imply a driver bug that would result
>>- * in an overrun.
>>- */
>>- if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>- DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
>>- break;
>>- }
>>-
>> /* The report-ID field for periodic samples includes
>> * some undocumented flags related to what triggered
>> * the report and is never expected to be zero so we
>
>
More information about the Intel-gfx
mailing list