[Mesa-dev] [PATCH 06/10] i965: perf: keep on reading reports until delimiting timestamp
Kenneth Graunke
kenneth at whitecape.org
Thu Jun 15 07:51:32 UTC 2017
On Wednesday, June 14, 2017 5:38:31 PM PDT Lionel Landwerlin wrote:
> Due to an underlying hardware race condition, we have no guarantee
> that all the reports coming from the OA buffer related to the workload
> we're trying to measure have landed to memory by the time all the work
> submitted has completed. That means we need to keep on reading the OA
> stream until we read a report with a timestamp older than the
timestamp newer / larger / more recent, right? i.e. keep going until
you find a report in the stream beyond the expected end? (Unless we're
reading backwards...)
> timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
> performance query.
>
> v2: fix uninitialized offset variable to 0 (Lionel)
>
> v3: rework the reading to avoid blocking the user of the API unless
> requested (Rob)
>
> v4: fix a bug that makes the i965 driver reading the perf stream when
> not necessary, leading to very long counter accumulation times
> (Lionel)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Cc: Robert Bragg <robert at sixbynine.org>
> ---
> src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++++++++++++++++++----
> 1 file changed, 113 insertions(+), 20 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index d10141bf07a..d11784c0352 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
> int refcount;
> int len;
> uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
> + uint32_t last_timestamp;
> };
>
> /**
> @@ -244,6 +245,11 @@ struct brw_perf_query_object
> struct brw_bo *bo;
>
> /**
> + * Address of mapped of @bo
> + */
> + void *map;
> +
> + /**
> * The MI_REPORT_PERF_COUNT command lets us specify a unique
> * ID that will be reflected in the resulting OA report
> * that's written by the GPU. This is the ID we're expecting
> @@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
> }
> }
>
> -static bool
> -read_oa_samples(struct brw_context *brw)
> +enum OaReadStatus {
> + OA_READ_STATUS_ERROR,
> + OA_READ_STATUS_UNFINISHED,
> + OA_READ_STATUS_FINISHED,
> +};
> +
> +static enum OaReadStatus
> +read_oa_samples_until(struct brw_context *brw,
> + uint32_t start_timestamp,
> + uint32_t end_timestamp)
> {
> + struct exec_node *tail_node =
> + exec_list_get_tail(&brw->perfquery.sample_buffers);
> + struct brw_oa_sample_buf *tail_buf =
> + exec_node_data(struct brw_oa_sample_buf, tail_node, link);
> + uint32_t last_timestamp = tail_buf->last_timestamp;
> +
> while (1) {
> struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
> + uint32_t offset;
> int len;
>
> while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
> @@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
>
> if (len < 0) {
> if (errno == EAGAIN)
> - return true;
> + return ((last_timestamp - start_timestamp) >=
> + (end_timestamp - start_timestamp)) ?
> + OA_READ_STATUS_FINISHED :
> + OA_READ_STATUS_UNFINISHED;
> else {
> DBG("Error reading i915 perf samples: %m\n");
> - return false;
> }
> - } else {
> + } else
> DBG("Spurious EOF reading i915 perf samples\n");
> - return false;
> - }
> +
> + return OA_READ_STATUS_ERROR;
> }
>
> buf->len = len;
> exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
> +
> + /* Go through the reports and update the last timestamp. */
> + offset = 0;
> + while (offset < buf->len) {
> + const struct drm_i915_perf_record_header *header =
> + (const struct drm_i915_perf_record_header *) &buf->buf[offset];
> + uint32_t *report = (uint32_t *) (header + 1);
> +
> + if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
> + last_timestamp = report[1];
> +
> + offset += header->size;
> + }
> +
> + buf->last_timestamp = last_timestamp;
> }
>
> unreachable("not reached");
> + return OA_READ_STATUS_ERROR;
> +}
> +
> +/**
> + * Try to read all the reports until either the delimiting timestamp
> + * or an error arises.
> + */
> +static bool
> +read_oa_samples_for_query(struct brw_context *brw,
> + struct brw_perf_query_object *obj)
> +{
> + uint32_t *start;
> + uint32_t *last;
> + uint32_t *end;
> +
> + /* We need the MI_REPORT_PERF_COUNT to land before we can start
> + * accumulate. */
*/ goes on its own line (here and elsewhere).
This all looks right to me.
Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20170615/7762ce15/attachment.sig>
More information about the mesa-dev
mailing list