[Intel-gfx] [PATCH 2/3] drm/i915/perf: Add support for report sizes that are not power of 2
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Wed Sep 18 08:21:01 UTC 2019
On 16/09/2019 22:17, Umesh Nerlige Ramappa wrote:
> On Sun, Sep 15, 2019 at 02:24:41PM +0300, Lionel Landwerlin wrote:
>> On 14/09/2019 02:06, Umesh Nerlige Ramappa wrote:
>>> OA perf unit supports non-power of 2 report sizes. Enable support for
>>> these sizes in the driver.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>>> ---
>>> drivers/gpu/drm/i915/i915_perf.c | 59 ++++++++++++--------------------
>>> 1 file changed, 21 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index 50b6d154fd46..482fca3da7de 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -450,7 +450,7 @@ static bool oa_buffer_check_unlocked(struct
>>> i915_perf_stream *stream)
>>> u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>>> int report_size = stream->oa_buffer.format_size;
>>> unsigned long flags;
>>> - u32 hw_tail;
>>> + u32 hw_tail, aging_tail;
>>> u64 now;
>>> /* We have to consider the (unlikely) possibility that read()
>>> errors
>>> @@ -459,16 +459,17 @@ static bool oa_buffer_check_unlocked(struct
>>> i915_perf_stream *stream)
>>> */
>>> spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>>> - hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream);
>>> + hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream) - gtt_offset;
>>> + aging_tail = stream->oa_buffer.aging_tail - gtt_offset;
>>> /* The tail pointer increases in 64 byte increments,
>>> * not in report_size steps...
>>> */
>>> - hw_tail &= ~(report_size - 1);
>>> + hw_tail = OA_TAKEN(hw_tail, (OA_TAKEN(hw_tail, aging_tail) %
>>> report_size));
>>
>>
>> I'm struggling to parse this line above and I'm not 100% sure it's
>> correct.
>>
>> Could add a comment to explain what is going on?
>
> The aging tail is always pointing to the boundary of a report whereas
> the hw_tail is advancing in 64 byte increments.
>
> The innermost OA_TAKEN is returning the number of bytes between the
> hw_tail and the aging_tail. The modulo is getting the size of the
> partial report (if any).
>
> The outermost OA_TAKEN is subtracting the size of partial report from
> the hw_tail to get a hw_tail that points to the boundary of the last
> full report.
>
> The value of hw_tail would be the same as from the deleted line of code
> above this line.
>
> Thanks,
> Umesh
Thanks, I ran a few tests locally to convince myself it's correct :)
It's still a bit difficult to parse, probably because OA_TAKEN() wasn't
meant for this.
Could create a helper function that does this computation, something
like this :
static inline u32 align_hw_tail_to_report_boundary(u32 hw_tail, u32
last_aligned_tail)
{
/* Compute potentially partially landed report in the OA buffer */
u32 partial_report_size = OA_TAKEN(hw_tail, last_aligned_tail) %
report_size;
/* Substract that partial amount off the tail. */
return (hw_tail - partial_report_size) % OA_BUFFER_SIZE;
}
Cheers,
-Lionel
>
>>
>>
>> Thanks,
>>
>>
>> -Lionel
>>
>>
>>> now = ktime_get_mono_fast_ns();
>>> - if (hw_tail == stream->oa_buffer.aging_tail) {
>>> + if (hw_tail == aging_tail) {
>>> /* If the HW tail hasn't move since the last check and the HW
>>> * tail has been aging for long enough, declare it the new
>>> * tail.
>>> @@ -486,8 +487,6 @@ static bool oa_buffer_check_unlocked(struct
>>> i915_perf_stream *stream)
>>> * a read() in progress.
>>> */
>>> head = stream->oa_buffer.head - gtt_offset;
>>> -
>>> - hw_tail -= gtt_offset;
>>> tail = hw_tail;
>>> /* Walk the stream backward until we find at least 2 reports
>>> @@ -613,7 +612,18 @@ static int append_oa_sample(struct
>>> i915_perf_stream *stream,
>>> buf += sizeof(header);
>>> if (sample_flags & SAMPLE_OA_REPORT) {
>>> - if (copy_to_user(buf, report, report_size))
>>> + u8 *oa_buf_end = stream->oa_buffer.vaddr + OA_BUFFER_SIZE;
>>> + int report_size_partial = oa_buf_end - report;
>>> +
>>> + if (report_size_partial < report_size) {
>>> + if (copy_to_user(buf, report, report_size_partial))
>>> + return -EFAULT;
>>> + buf += report_size_partial;
>>> +
>>> + if (copy_to_user(buf, stream->oa_buffer.vaddr,
>>> + report_size - report_size_partial))
>>> + return -EFAULT;
>>> + } else if (copy_to_user(buf, report, report_size))
>>> return -EFAULT;
>>> }
>>> @@ -682,8 +692,8 @@ static int gen8_append_oa_reports(struct
>>> i915_perf_stream *stream,
>>> * only be incremented by multiples of the report size (notably
>>> also
>>> * all a power of two).
>>> */
>>> - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>> - tail > OA_BUFFER_SIZE || tail % report_size,
>>> + if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>> + tail > OA_BUFFER_SIZE,
>>> "Inconsistent OA buffer pointers: head = %u, tail =
>>> %u\n",
>>> head, tail))
>>> return -EIO;
>>> @@ -697,20 +707,6 @@ static int gen8_append_oa_reports(struct
>>> i915_perf_stream *stream,
>>> u32 ctx_id;
>>> u32 reason;
>>> - /*
>>> - * All the report sizes factor neatly into the buffer
>>> - * size so we never expect to see a report split
>>> - * between the beginning and end of the buffer.
>>> - *
>>> - * Given the initial alignment check a misalignment
>>> - * here would imply a driver bug that would result
>>> - * in an overrun.
>>> - */
>>> - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>> - DRM_ERROR("Spurious OA head ptr: non-integral report
>>> offset\n");
>>> - break;
>>> - }
>>> -
>>> /*
>>> * The reason field includes flags identifying what
>>> * triggered this specific report (mostly timer
>>> @@ -956,8 +952,8 @@ static int gen7_append_oa_reports(struct
>>> i915_perf_stream *stream,
>>> * only be incremented by multiples of the report size (notably
>>> also
>>> * all a power of two).
>>> */
>>> - if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size ||
>>> - tail > OA_BUFFER_SIZE || tail % report_size,
>>> + if (WARN_ONCE(head > OA_BUFFER_SIZE ||
>>> + tail > OA_BUFFER_SIZE,
>>> "Inconsistent OA buffer pointers: head = %u, tail =
>>> %u\n",
>>> head, tail))
>>> return -EIO;
>>> @@ -969,19 +965,6 @@ static int gen7_append_oa_reports(struct
>>> i915_perf_stream *stream,
>>> u8 *report = oa_buf_base + head;
>>> u32 *report32 = (void *)report;
>>> - /* All the report sizes factor neatly into the buffer
>>> - * size so we never expect to see a report split
>>> - * between the beginning and end of the buffer.
>>> - *
>>> - * Given the initial alignment check a misalignment
>>> - * here would imply a driver bug that would result
>>> - * in an overrun.
>>> - */
>>> - if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
>>> - DRM_ERROR("Spurious OA head ptr: non-integral report
>>> offset\n");
>>> - break;
>>> - }
>>> -
>>> /* The report-ID field for periodic samples includes
>>> * some undocumented flags related to what triggered
>>> * the report and is never expected to be zero so we
>>
>>
>
More information about the Intel-gfx
mailing list