[Intel-gfx] [PATCH 2/3] drm/i915/perf: Add support for report sizes that are not power of 2

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Wed Sep 18 16:59:19 UTC 2019


On Wed, Sep 18, 2019 at 11:21:01AM +0300, Lionel Landwerlin wrote:
>On 16/09/2019 22:17, Umesh Nerlige Ramappa wrote:
>>On Sun, Sep 15, 2019 at 02:24:41PM +0300, Lionel Landwerlin wrote:
>>>On 14/09/2019 02:06, Umesh Nerlige Ramappa wrote:
>>>>OA perf unit supports non-power of 2 report sizes. Enable support for
>>>>these sizes in the driver.
>>>>
>>>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>>>---
>>>> drivers/gpu/drm/i915/i915_perf.c | 59 ++++++++++++--------------------
>>>> 1 file changed, 21 insertions(+), 38 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>>b/drivers/gpu/drm/i915/i915_perf.c
>>>>index 50b6d154fd46..482fca3da7de 100644
>>>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>@@ -450,7 +450,7 @@ static bool oa_buffer_check_unlocked(struct 
>>>>i915_perf_stream *stream)
>>>>     u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>>>>     int report_size = stream->oa_buffer.format_size;
>>>>     unsigned long flags;
>>>>-    u32 hw_tail;
>>>>+    u32 hw_tail, aging_tail;
>>>>     u64 now;
>>>>     /* We have to consider the (unlikely) possibility that 
>>>>read() errors
>>>>@@ -459,16 +459,17 @@ static bool 
>>>>oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>>>>      */
>>>>     spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>>>>-    hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
>>>>+    hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream) - gtt_offset;
>>>>+    aging_tail = stream->oa_buffer.aging_tail - gtt_offset;
>>>>     /* The tail pointer increases in 64 byte increments,
>>>>      * not in report_size steps...
>>>>      */
>>>>-    hw_tail &= ~(report_size - 1);
>>>>+    hw_tail = OA_TAKEN(hw_tail, (OA_TAKEN(hw_tail, aging_tail) 
>>>>% report_size));
>>>
>>>
>>>I'm struggling to parse this line above and I'm not 100% sure it's 
>>>correct.
>>>
>>>Could add a comment to explain what is going on?
>>
>>The aging tail is always pointing to the boundary of a report whereas
>>the hw_tail is advancing in 64 byte increments.
>>
>>The innermost OA_TAKEN is returning the number of bytes between the
>>hw_tail and the aging_tail. The modulo is getting the size of the
>>partial report (if any).
>>
>>The outermost OA_TAKEN is subtracting the size of partial report from
>>the hw_tail to get a hw_tail that points to the boundary of the last
>>full report.
>>
>>The value of hw_tail would be the same as from the deleted line of code
>>above this line.
>>
>>Thanks,
>>Umesh
>
>
>Thanks, I ran a few tests locally to convince myself it's correct :)
>
>
>It's still a bit difficult to parse, probably because OA_TAKEN() 
>wasn't meant for this.
>
>Could create a helper function that does this computation, something 
>like this :
>
>
>static inline u32 align_hw_tail_to_report_boundary(u32 hw_tail, u32 
>last_aligned_tail)
>
>{
>
>    /* Compute potentially partially landed report in the OA buffer */
>
>    u32 partial_report_size = OA_TAKEN(hw_tail, last_aligned_tail) % 
>report_size;
>
>    /* Substract that partial amount off the tail. */
>
>    return (hw_tail - partial_report_size) % OA_BUFFER_SIZE;
>
>}

Sure, I can add a helper function to make this more readable.

Thanks,
Umesh

>
>
>Cheers,
>
>
>-Lionel
>
>
>>
>>>
>>>
>>>Thanks,
>>>
>>>
>>>-Lionel
>>>
>>>
>>>>     now = ktime_get_mono_fast_ns();
>>>>-    if (hw_tail == stream->oa_buffer.aging_tail) {
>>>>+    if (hw_tail == aging_tail) {
>>>>         /* If the HW tail hasn't move since the last check and the HW
>>>>          * tail has been aging for long enough, declare it the new
>>>>          * tail.
>>>>@@ -486,8 +487,6 @@ static bool oa_buffer_check_unlocked(struct 
>>>>i915_perf_stream *stream)
>>>>          * a read() in progress.
>>>>          */
>>>>         head = stream->oa_buffer.head - gtt_offset;
>>>>-
>>>>-        hw_tail -= gtt_offset;
>>>>         tail = hw_tail;
>>>>         /* Walk the stream backward until we find at least 2 reports
>>>>@@ -613,7 +612,18 @@ static int append_oa_sample(struct 
>>>>i915_perf_stream *stream,
>>>>     buf += sizeof(header);
>>>>     if (sample_flags & SAMPLE_OA_REPORT) {
>>>>-        if (copy_to_user(buf, report, report_size))
>>>>+        u8 *oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
>>>>+        int report_size_partial = oa_buf_end - report;
>>>>+
>>>>+        if (report_size_partial < report_size) {
>>>>+            if (copy_to_user(buf, report, report_size_partial))
>>>>+                return -EFAULT;
>>>>+            buf += report_size_partial;
>>>>+
>>>>+            if (copy_to_user(buf, stream->oa_buffer.vaddr,
>>>>+                    report_size - report_size_partial))
>>>>+                return -EFAULT;
>>>>+        } else if (copy_to_user(buf, report, report_size))
>>>>             return -EFAULT;
>>>>     }
>>>>@@ -682,8 +692,8 @@ static int gen8_append_oa_reports(struct 
>>>>i915_perf_stream *stream,
>>>>      * only be incremented by multiples of the report size 
>>>>(notably also
>>>>      * all a power of two).
>>>>      */
>>>>-    if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>>>-              tail > OA_BUFFER_SIZE || tail % report_size,
>>>>+    if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>>>+              tail > OA_BUFFER_SIZE,
>>>>               "Inconsistent OA buffer pointers: head = %u, tail 
>>>>= %u\n",
>>>>               head, tail))
>>>>         return -EIO;
>>>>@@ -697,20 +707,6 @@ static int gen8_append_oa_reports(struct 
>>>>i915_perf_stream *stream,
>>>>         u32 ctx_id;
>>>>         u32 reason;
>>>>-        /*
>>>>-         * All the report sizes factor neatly into the buffer
>>>>-         * size so we never expect to see a report split
>>>>-         * between the beginning and end of the buffer.
>>>>-         *
>>>>-         * Given the initial alignment check a misalignment
>>>>-         * here would imply a driver bug that would result
>>>>-         * in an overrun.
>>>>-         */
>>>>-        if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>>>-            DRM_ERROR("Spurious OA head ptr: non-integral 
>>>>report offset\n");
>>>>-            break;
>>>>-        }
>>>>-
>>>>         /*
>>>>          * The reason field includes flags identifying what
>>>>          * triggered this specific report (mostly timer
>>>>@@ -956,8 +952,8 @@ static int gen7_append_oa_reports(struct 
>>>>i915_perf_stream *stream,
>>>>      * only be incremented by multiples of the report size 
>>>>(notably also
>>>>      * all a power of two).
>>>>      */
>>>>-    if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>>>-              tail > OA_BUFFER_SIZE || tail % report_size,
>>>>+    if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>>>+              tail > OA_BUFFER_SIZE,
>>>>               "Inconsistent OA buffer pointers: head = %u, tail 
>>>>= %u\n",
>>>>               head, tail))
>>>>         return -EIO;
>>>>@@ -969,19 +965,6 @@ static int gen7_append_oa_reports(struct 
>>>>i915_perf_stream *stream,
>>>>         u8 *report = oa_buf_base + head;
>>>>         u32 *report32 = (void *)report;
>>>>-        /* All the report sizes factor neatly into the buffer
>>>>-         * size so we never expect to see a report split
>>>>-         * between the beginning and end of the buffer.
>>>>-         *
>>>>-         * Given the initial alignment check a misalignment
>>>>-         * here would imply a driver bug that would result
>>>>-         * in an overrun.
>>>>-         */
>>>>-        if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>>>-            DRM_ERROR("Spurious OA head ptr: non-integral 
>>>>report offset\n");
>>>>-            break;
>>>>-        }
>>>>-
>>>>         /* The report-ID field for periodic samples includes
>>>>          * some undocumented flags related to what triggered
>>>>          * the report and is never expected to be zero so we
>>>
>>>
>>
>


More information about the Intel-gfx mailing list