[Mesa-dev] [PATCH 06/10] i965: perf: keep on reading reports until delimiting timestamp

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Jun 15 14:45:38 UTC 2017


On 15/06/17 08:51, Kenneth Graunke wrote:
> On Wednesday, June 14, 2017 5:38:31 PM PDT Lionel Landwerlin wrote:
>> Due to an underlying hardware race condition, we have no guarantee
>> that all the reports coming from the OA buffer related to the workload
>> we're trying to measure have landed to memory by the time all the work
>> submitted has completed. That means we need to keep on reading the OA
>> stream until we read a report with a timestamp older than the
> timestamp newer / larger / more recent, right?  i.e. keep going until
> you find a report in the stream beyond the expected end?  (Unless we're
> reading backwards...)

More recent indeed :)
I meant older in the sense that the number is larger (like I'm older 
because my age is a bigger number...), but it's all inverted here :)

Fixed, thanks a lot!

>
>> timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
>> performance query.
>>
>> v2: fix uninitialized offset variable to 0 (Lionel)
>>
>> v3: rework the reading to avoid blocking the user of the API unless
>>      requested (Rob)
>>
>> v4: fix a bug that makes the i965 driver reading the perf stream when
>>      not necessary, leading to very long counter accumulation times
>>      (Lionel)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> Cc: Robert Bragg <robert at sixbynine.org>
>> ---
>>   src/mesa/drivers/dri/i965/brw_performance_query.c | 133 ++++++++++++++++++----
>>   1 file changed, 113 insertions(+), 20 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c
>> index d10141bf07a..d11784c0352 100644
>> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
>> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
>> @@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
>>      int refcount;
>>      int len;
>>      uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
>> +   uint32_t last_timestamp;
>>   };
>>   
>>   /**
>> @@ -244,6 +245,11 @@ struct brw_perf_query_object
>>            struct brw_bo *bo;
>>   
>>            /**
>> +          * Address of mapped of @bo
>> +          */
>> +         void *map;
>> +
>> +         /**
>>             * The MI_REPORT_PERF_COUNT command lets us specify a unique
>>             * ID that will be reflected in the resulting OA report
>>             * that's written by the GPU. This is the ID we're expecting
>> @@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
>>      }
>>   }
>>   
>> -static bool
>> -read_oa_samples(struct brw_context *brw)
>> +enum OaReadStatus {
>> +   OA_READ_STATUS_ERROR,
>> +   OA_READ_STATUS_UNFINISHED,
>> +   OA_READ_STATUS_FINISHED,
>> +};
>> +
>> +static enum OaReadStatus
>> +read_oa_samples_until(struct brw_context *brw,
>> +                      uint32_t start_timestamp,
>> +                      uint32_t end_timestamp)
>>   {
>> +   struct exec_node *tail_node =
>> +      exec_list_get_tail(&brw->perfquery.sample_buffers);
>> +   struct brw_oa_sample_buf *tail_buf =
>> +      exec_node_data(struct brw_oa_sample_buf, tail_node, link);
>> +   uint32_t last_timestamp = tail_buf->last_timestamp;
>> +
>>      while (1) {
>>         struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
>> +      uint32_t offset;
>>         int len;
>>   
>>         while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
>> @@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
>>   
>>            if (len < 0) {
>>               if (errno == EAGAIN)
>> -               return true;
>> +               return ((last_timestamp - start_timestamp) >=
>> +                       (end_timestamp - start_timestamp)) ?
>> +                      OA_READ_STATUS_FINISHED :
>> +                      OA_READ_STATUS_UNFINISHED;
>>               else {
>>                  DBG("Error reading i915 perf samples: %m\n");
>> -               return false;
>>               }
>> -         } else {
>> +         } else
>>               DBG("Spurious EOF reading i915 perf samples\n");
>> -            return false;
>> -         }
>> +
>> +         return OA_READ_STATUS_ERROR;
>>         }
>>   
>>         buf->len = len;
>>         exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
>> +
>> +      /* Go through the reports and update the last timestamp. */
>> +      offset = 0;
>> +      while (offset < buf->len) {
>> +         const struct drm_i915_perf_record_header *header =
>> +            (const struct drm_i915_perf_record_header *) &buf->buf[offset];
>> +         uint32_t *report = (uint32_t *) (header + 1);
>> +
>> +         if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
>> +            last_timestamp = report[1];
>> +
>> +         offset += header->size;
>> +      }
>> +
>> +      buf->last_timestamp = last_timestamp;
>>      }
>>   
>>      unreachable("not reached");
>> +   return OA_READ_STATUS_ERROR;
>> +}
>> +
>> +/**
>> + * Try to read all the reports until either the delimiting timestamp
>> + * or an error arises.
>> + */
>> +static bool
>> +read_oa_samples_for_query(struct brw_context *brw,
>> +                          struct brw_perf_query_object *obj)
>> +{
>> +   uint32_t *start;
>> +   uint32_t *last;
>> +   uint32_t *end;
>> +
>> +   /* We need the MI_REPORT_PERF_COUNT to land before we can start
>> +    * accumulate. */
>     */ goes on its own line (here and elsewhere).
>
> This all looks right to me.
>
> Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>




More information about the mesa-dev mailing list