[Mesa-dev] [PATCH 06/10] i965: perf: keep on reading reports until delimiting timestamp
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Thu Jun 15 14:45:38 UTC 2017
On 15/06/17 08:51, Kenneth Graunke wrote:
> On Wednesday, June 14, 2017 5:38:31 PM PDT Lionel Landwerlin wrote:
>> Due to an underlying hardware race condition, we have no guarantee
>> that all the reports coming from the OA buffer related to the workload
>> we're trying to measure have landed to memory by the time all the work
>> submitted has completed. That means we need to keep on reading the OA
>> stream until we read a report with a timestamp older than the
> timestamp newer / larger / more recent, right? i.e. keep going until
> you find a report in the stream beyond the expected end? (Unless we're
> reading backwards...)
More recent indeed :)
I meant older in the sense that the number is larger (like I'm older
because my age is a bigger number...), but it's all inverted here :)
Fixed, thanks a lot!
>
>> timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
>> performance query.
>>
>> v2: fix uninitialized offset variable to 0 (Lionel)
>>
>> v3: rework the reading to avoid blocking the user of the API unless
>> requested (Rob)
>>
>> v4: fix a bug that makes the i965 driver reading the perf stream when
>> not necessary, leading to very long counter accumulation times
>> (Lionel)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Cc: Robert Bragg <robert at sixbynine.org>
>> ---
>> src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++++++++++++++++++----
>> 1 file changed, 113 insertions(+), 20 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
>> index d10141bf07a..d11784c0352 100644
>> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
>> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
>> @@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
>> int refcount;
>> int len;
>> uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
>> + uint32_t last_timestamp;
>> };
>>
>> /**
>> @@ -244,6 +245,11 @@ struct brw_perf_query_object
>> struct brw_bo *bo;
>>
>> /**
>> + * Address of mapped of @bo
>> + */
>> + void *map;
>> +
>> + /**
>> * The MI_REPORT_PERF_COUNT command lets us specify a unique
>> * ID that will be reflected in the resulting OA report
>> * that's written by the GPU. This is the ID we're expecting
>> @@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
>> }
>> }
>>
>> -static bool
>> -read_oa_samples(struct brw_context *brw)
>> +enum OaReadStatus {
>> + OA_READ_STATUS_ERROR,
>> + OA_READ_STATUS_UNFINISHED,
>> + OA_READ_STATUS_FINISHED,
>> +};
>> +
>> +static enum OaReadStatus
>> +read_oa_samples_until(struct brw_context *brw,
>> + uint32_t start_timestamp,
>> + uint32_t end_timestamp)
>> {
>> + struct exec_node *tail_node =
>> + exec_list_get_tail(&brw->perfquery.sample_buffers);
>> + struct brw_oa_sample_buf *tail_buf =
>> + exec_node_data(struct brw_oa_sample_buf, tail_node, link);
>> + uint32_t last_timestamp = tail_buf->last_timestamp;
>> +
>> while (1) {
>> struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
>> + uint32_t offset;
>> int len;
>>
>> while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
>> @@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
>>
>> if (len < 0) {
>> if (errno == EAGAIN)
>> - return true;
>> + return ((last_timestamp - start_timestamp) >=
>> + (end_timestamp - start_timestamp)) ?
>> + OA_READ_STATUS_FINISHED :
>> + OA_READ_STATUS_UNFINISHED;
>> else {
>> DBG("Error reading i915 perf samples: %m\n");
>> - return false;
>> }
>> - } else {
>> + } else
>> DBG("Spurious EOF reading i915 perf samples\n");
>> - return false;
>> - }
>> +
>> + return OA_READ_STATUS_ERROR;
>> }
>>
>> buf->len = len;
>> exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
>> +
>> + /* Go through the reports and update the last timestamp. */
>> + offset = 0;
>> + while (offset < buf->len) {
>> + const struct drm_i915_perf_record_header *header =
>> + (const struct drm_i915_perf_record_header *) &buf->buf[offset];
>> + uint32_t *report = (uint32_t *) (header + 1);
>> +
>> + if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
>> + last_timestamp = report[1];
>> +
>> + offset += header->size;
>> + }
>> +
>> + buf->last_timestamp = last_timestamp;
>> }
>>
>> unreachable("not reached");
>> + return OA_READ_STATUS_ERROR;
>> +}
>> +
>> +/**
>> + * Try to read all the reports until either the delimiting timestamp
>> + * or an error arises.
>> + */
>> +static bool
>> +read_oa_samples_for_query(struct brw_context *brw,
>> + struct brw_perf_query_object *obj)
>> +{
>> + uint32_t *start;
>> + uint32_t *last;
>> + uint32_t *end;
>> +
>> + /* We need the MI_REPORT_PERF_COUNT to land before we can start
>> + * accumulate. */
> */ goes on its own line (here and elsewhere).
>
> This all looks right to me.
>
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
More information about the mesa-dev
mailing list