[Intel-gfx] [PATCH 2/3] drm/i915/perf: Add support for report sizes that are not power of 2

Lionel Landwerlin lionel.g.landwerlin at intel.com
Wed Sep 18 08:21:01 UTC 2019


On 16/09/2019 22:17, Umesh Nerlige Ramappa wrote:
> On Sun, Sep 15, 2019 at 02:24:41PM +0300, Lionel Landwerlin wrote:
>> On 14/09/2019 02:06, Umesh Nerlige Ramappa wrote:
>>> OA perf unit supports non-power of 2 report sizes. Enable support for
>>> these sizes in the driver.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_perf.c | 59 ++++++++++++--------------------
>>>  1 file changed, 21 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index 50b6d154fd46..482fca3da7de 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -450,7 +450,7 @@ static bool oa_buffer_check_unlocked(struct 
>>> i915_perf_stream *stream)
>>>      u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>>>      int report_size = stream->oa_buffer.format_size;
>>>      unsigned long flags;
>>> -    u32 hw_tail;
>>> +    u32 hw_tail, aging_tail;
>>>      u64 now;
>>>      /* We have to consider the (unlikely) possibility that read() 
>>> errors
>>> @@ -459,16 +459,17 @@ static bool oa_buffer_check_unlocked(struct 
>>> i915_perf_stream *stream)
>>>       */
>>>      spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>>> -    hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
>>> +    hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream) - gtt_offset;
>>> +    aging_tail = stream->oa_buffer.aging_tail - gtt_offset;
>>>      /* The tail pointer increases in 64 byte increments,
>>>       * not in report_size steps...
>>>       */
>>> -    hw_tail &= ~(report_size - 1);
>>> +    hw_tail = OA_TAKEN(hw_tail, (OA_TAKEN(hw_tail, aging_tail) % 
>>> report_size));
>>
>>
>> I'm struggling to parse this line above and I'm not 100% sure it's 
>> correct.
>>
>> Could add a comment to explain what is going on?
>
> The aging tail is always pointing to the boundary of a report whereas
> the hw_tail is advancing in 64 byte increments.
>
> The innermost OA_TAKEN is returning the number of bytes between the
> hw_tail and the aging_tail. The modulo is getting the size of the
> partial report (if any).
>
> The outermost OA_TAKEN is subtracting the size of partial report from
> the hw_tail to get a hw_tail that points to the boundary of the last
> full report.
>
> The value of hw_tail would be the same as from the deleted line of code
> above this line.
>
> Thanks,
> Umesh


Thanks, I ran a few tests locally to convince myself it's correct :)


It's still a bit difficult to parse, probably because OA_TAKEN() wasn't 
meant for this.

Could create a helper function that does this computation, something 
like this :


static inline u32 align_hw_tail_to_report_boundary(u32 hw_tail, u32 
last_aligned_tail)

{

     /* Compute potentially partially landed report in the OA buffer */

     u32 partial_report_size = OA_TAKEN(hw_tail, last_aligned_tail) % 
report_size;

     /* Substract that partial amount off the tail. */

     return (hw_tail - partial_report_size) % OA_BUFFER_SIZE;

}


Cheers,


-Lionel


>
>>
>>
>> Thanks,
>>
>>
>> -Lionel
>>
>>
>>>      now = ktime_get_mono_fast_ns();
>>> -    if (hw_tail == stream->oa_buffer.aging_tail) {
>>> +    if (hw_tail == aging_tail) {
>>>          /* If the HW tail hasn't move since the last check and the HW
>>>           * tail has been aging for long enough, declare it the new
>>>           * tail.
>>> @@ -486,8 +487,6 @@ static bool oa_buffer_check_unlocked(struct 
>>> i915_perf_stream *stream)
>>>           * a read() in progress.
>>>           */
>>>          head = stream->oa_buffer.head - gtt_offset;
>>> -
>>> -        hw_tail -= gtt_offset;
>>>          tail = hw_tail;
>>>          /* Walk the stream backward until we find at least 2 reports
>>> @@ -613,7 +612,18 @@ static int append_oa_sample(struct 
>>> i915_perf_stream *stream,
>>>      buf += sizeof(header);
>>>      if (sample_flags & SAMPLE_OA_REPORT) {
>>> -        if (copy_to_user(buf, report, report_size))
>>> +        u8 *oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
>>> +        int report_size_partial = oa_buf_end - report;
>>> +
>>> +        if (report_size_partial < report_size) {
>>> +            if (copy_to_user(buf, report, report_size_partial))
>>> +                return -EFAULT;
>>> +            buf += report_size_partial;
>>> +
>>> +            if (copy_to_user(buf, stream->oa_buffer.vaddr,
>>> +                    report_size - report_size_partial))
>>> +                return -EFAULT;
>>> +        } else if (copy_to_user(buf, report, report_size))
>>>              return -EFAULT;
>>>      }
>>> @@ -682,8 +692,8 @@ static int gen8_append_oa_reports(struct 
>>> i915_perf_stream *stream,
>>>       * only be incremented by multiples of the report size (notably 
>>> also
>>>       * all a power of two).
>>>       */
>>> -    if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>> -              tail > OA_BUFFER_SIZE || tail % report_size,
>>> +    if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>> +              tail > OA_BUFFER_SIZE,
>>>                "Inconsistent OA buffer pointers: head = %u, tail = 
>>> %u\n",
>>>                head, tail))
>>>          return -EIO;
>>> @@ -697,20 +707,6 @@ static int gen8_append_oa_reports(struct 
>>> i915_perf_stream *stream,
>>>          u32 ctx_id;
>>>          u32 reason;
>>> -        /*
>>> -         * All the report sizes factor neatly into the buffer
>>> -         * size so we never expect to see a report split
>>> -         * between the beginning and end of the buffer.
>>> -         *
>>> -         * Given the initial alignment check a misalignment
>>> -         * here would imply a driver bug that would result
>>> -         * in an overrun.
>>> -         */
>>> -        if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>> -            DRM_ERROR("Spurious OA head ptr: non-integral report 
>>> offset\n");
>>> -            break;
>>> -        }
>>> -
>>>          /*
>>>           * The reason field includes flags identifying what
>>>           * triggered this specific report (mostly timer
>>> @@ -956,8 +952,8 @@ static int gen7_append_oa_reports(struct 
>>> i915_perf_stream *stream,
>>>       * only be incremented by multiples of the report size (notably 
>>> also
>>>       * all a power of two).
>>>       */
>>> -    if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>> -              tail > OA_BUFFER_SIZE || tail % report_size,
>>> +    if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>> +              tail > OA_BUFFER_SIZE,
>>>                "Inconsistent OA buffer pointers: head = %u, tail = 
>>> %u\n",
>>>                head, tail))
>>>          return -EIO;
>>> @@ -969,19 +965,6 @@ static int gen7_append_oa_reports(struct 
>>> i915_perf_stream *stream,
>>>          u8 *report = oa_buf_base + head;
>>>          u32 *report32 = (void *)report;
>>> -        /* All the report sizes factor neatly into the buffer
>>> -         * size so we never expect to see a report split
>>> -         * between the beginning and end of the buffer.
>>> -         *
>>> -         * Given the initial alignment check a misalignment
>>> -         * here would imply a driver bug that would result
>>> -         * in an overrun.
>>> -         */
>>> -        if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>> -            DRM_ERROR("Spurious OA head ptr: non-integral report 
>>> offset\n");
>>> -            break;
>>> -        }
>>> -
>>>          /* The report-ID field for periodic samples includes
>>>           * some undocumented flags related to what triggered
>>>           * the report and is never expected to be zero so we
>>
>>
>



More information about the Intel-gfx mailing list