[igt-dev] [PATCH 3/5] lib/i915/perf: Add new record for mmaped OA buffer

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Mon Aug 30 19:33:35 UTC 2021


DRM_I915_PERF_RECORD_SAMPLE header is added by i915 when user issues a
read to read the counter reports from the OA buffer. When user mmaps the
OA buffer, user has a view into the raw reports without this header.

Introduce INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE in the perf library to
track reports from an mmapped OA buffer.

While each DRM_I915_PERF_RECORD_SAMPLE record corresponds to a single OA
report, INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE corresponds to multiple
OA reports.

By design, these 2 record types cannot be mixed. The i915-perf-recorder
chooses to use mmaped OA buffer using the -M option. Once -M is chosen,
all samples are INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE.

The way timeline events are created and displayed in GPUvis remains
the same, the only change is that the source of these events now is
multiple INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE records.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
 lib/i915/perf.c                    |  7 ++-
 lib/i915/perf.h                    |  4 +-
 lib/i915/perf_data.h               |  3 +
 lib/i915/perf_data_reader.c        | 95 +++++++++++++++++++++++++++++-
 lib/i915/perf_data_reader.h        |  2 +
 tools/i915-perf/i915_perf_reader.c |  3 +-
 6 files changed, 106 insertions(+), 8 deletions(-)

diff --git a/lib/i915/perf.c b/lib/i915/perf.c
index 9cfa3bca..3ace687c 100644
--- a/lib/i915/perf.c
+++ b/lib/i915/perf.c
@@ -626,10 +626,11 @@ accumulate_uint40(int a_index,
 void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
 				   int oa_format,
 				   const struct drm_i915_perf_record_header *record0,
-				   const struct drm_i915_perf_record_header *record1)
+				   const struct drm_i915_perf_record_header *record1,
+				   uint32_t offset0, uint32_t offset1)
 {
-	const uint32_t *start = (const uint32_t *)(record0 + 1);
-	const uint32_t *end = (const uint32_t *)(record1 + 1);
+	const uint32_t *start = (const uint32_t *)(record0 + 1) + (offset0 / 4);
+	const uint32_t *end = (const uint32_t *)(record1 + 1) + (offset1 / 4);
 	uint64_t *deltas = acc->deltas;
 	int idx = 0;
 	int i;
diff --git a/lib/i915/perf.h b/lib/i915/perf.h
index d2429c47..7706eb43 100644
--- a/lib/i915/perf.h
+++ b/lib/i915/perf.h
@@ -238,7 +238,9 @@ void intel_perf_load_perf_configs(struct intel_perf *perf, int drm_fd);
 void intel_perf_accumulate_reports(struct intel_perf_accumulator *acc,
 				   int oa_format,
 				   const struct drm_i915_perf_record_header *record0,
-				   const struct drm_i915_perf_record_header *record1);
+				   const struct drm_i915_perf_record_header *record1,
+				   uint32_t report_start_offset,
+				   uint32_t report_end_offset);
 
 #ifdef __cplusplus
 };
diff --git a/lib/i915/perf_data.h b/lib/i915/perf_data.h
index fb3556f6..a730a0b4 100644
--- a/lib/i915/perf_data.h
+++ b/lib/i915/perf_data.h
@@ -52,6 +52,9 @@ enum intel_perf_record_type {
 
 	/* intel_perf_record_timestamp_correlation */
 	INTEL_PERF_RECORD_TYPE_TIMESTAMP_CORRELATION,
+
+	/* intel_perf_record_timestamp_correlation */
+	INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE,
 };
 
 /* This structure cannot ever change. */
diff --git a/lib/i915/perf_data_reader.c b/lib/i915/perf_data_reader.c
index e69189ac..ad0b2daf 100644
--- a/lib/i915/perf_data_reader.c
+++ b/lib/i915/perf_data_reader.c
@@ -131,6 +131,7 @@ parse_data(struct intel_perf_data_reader *reader)
 
 		switch (header->type) {
 		case DRM_I915_PERF_RECORD_SAMPLE:
+		case INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE:
 			append_record(reader, header);
 			break;
 
@@ -254,6 +255,7 @@ static void
 append_timeline_event(struct intel_perf_data_reader *reader,
 		      uint64_t ts_start, uint64_t ts_end,
 		      uint32_t record_start, uint32_t record_end,
+		      uint32_t start_offset, uint32_t end_offset,
 		      uint32_t hw_id)
 {
 	if (reader->n_timelines >= reader->n_allocated_timelines) {
@@ -274,12 +276,81 @@ append_timeline_event(struct intel_perf_data_reader *reader,
 		correlate_gpu_timestamp(reader, ts_end);
 	reader->timelines[reader->n_timelines].record_start = record_start;
 	reader->timelines[reader->n_timelines].record_end = record_end;
+	reader->timelines[reader->n_timelines].report_start_offset = start_offset;
+	reader->timelines[reader->n_timelines].report_end_offset = end_offset;
 	reader->timelines[reader->n_timelines].hw_id = hw_id;
 	reader->n_timelines++;
 }
 
+struct perf_record_report {
+	uint32_t record_idx;
+	uint32_t report_offset;
+	uint32_t ctx_id;
+	uint64_t gpu_ts;
+};
+
 static void
-generate_cpu_events(struct intel_perf_data_reader *reader)
+__init_perf_record_report(struct intel_perf_data_reader *reader,
+			  struct perf_record_report *prr)
+{
+	const struct drm_i915_perf_record_header *record;
+	const uint8_t *report;
+
+	record = reader->records[prr->record_idx];
+	report = (const uint8_t *)(record + 1) + prr->report_offset;
+	prr->ctx_id = oa_report_ctx_id(&reader->devinfo, report);
+	prr->gpu_ts = oa_report_timestamp(report);
+}
+
+static bool
+__context_switched(struct intel_perf_data_reader *reader,
+		   struct perf_record_report *prev,
+		   struct perf_record_report *curr)
+{
+	__init_perf_record_report(reader, prev);
+	__init_perf_record_report(reader, curr);
+
+	return (prev->ctx_id != curr->ctx_id);
+}
+
+static void
+append_timeline(struct intel_perf_data_reader *reader,
+		struct perf_record_report *prev,
+		struct perf_record_report *curr)
+{
+	append_timeline_event(reader, prev->gpu_ts, curr->gpu_ts,
+			prev->record_idx, curr->record_idx,
+			prev->report_offset, curr->report_offset,
+			prev->ctx_id);
+}
+
+static void
+generate_cpu_events_multi_sample(struct intel_perf_data_reader *reader)
+{
+	uint32_t report_size = reader->metric_set->perf_raw_size;
+	struct perf_record_report prev = {0}, curr = {0};
+	int i;
+
+	for (i = 0; i < reader->n_records; i++) {
+		uint32_t length = reader->records[i]->size -
+				  sizeof(*(reader->records[i]));
+
+		curr.record_idx = i;
+		for (curr.report_offset = 0;
+		     curr.report_offset < length;
+		     curr.report_offset += report_size)
+			if (__context_switched(reader, &prev, &curr)) {
+				append_timeline(reader, &prev, &curr);
+				prev = curr;
+			}
+	}
+
+	if (!memcmp(&prev, &curr, sizeof(prev)))
+		append_timeline(reader, &prev, &curr);
+}
+
+static void
+generate_cpu_events_oa_sample(struct intel_perf_data_reader *reader)
 {
 	uint32_t last_header_idx = 0;
 	const struct drm_i915_perf_record_header *last_header = reader->records[0],
@@ -303,14 +374,32 @@ generate_cpu_events(struct intel_perf_data_reader *reader)
 		if (last_ctx_id == current_ctx_id)
 			continue;
 
-		append_timeline_event(reader, gpu_ts_start, gpu_ts_end, last_header_idx, i, last_ctx_id);
+		append_timeline_event(reader, gpu_ts_start, gpu_ts_end,
+				      last_header_idx, i,
+				      0, 0,
+				      last_ctx_id);
 
 		last_header = current_header;
 		last_header_idx = i;
 	}
 
 	if (last_header != current_header)
-		append_timeline_event(reader, gpu_ts_start, gpu_ts_end, last_header_idx, reader->n_records - 1, last_ctx_id);
+		append_timeline_event(reader, gpu_ts_start, gpu_ts_end,
+				      last_header_idx, reader->n_records - 1,
+				      0, 0,
+				      last_ctx_id);
+}
+
+static void
+generate_cpu_events(struct intel_perf_data_reader *reader)
+{
+	const struct drm_i915_perf_record_header *hdr = reader->records[0];
+
+	if (hdr->type == DRM_I915_PERF_RECORD_SAMPLE)
+		generate_cpu_events_oa_sample(reader);
+
+	if (hdr->type == INTEL_PERF_RECORD_TYPE_MULTIPLE_SAMPLE)
+		generate_cpu_events_multi_sample(reader);
 }
 
 static void
diff --git a/lib/i915/perf_data_reader.h b/lib/i915/perf_data_reader.h
index f625f12d..a9f14a1d 100644
--- a/lib/i915/perf_data_reader.h
+++ b/lib/i915/perf_data_reader.h
@@ -44,6 +44,8 @@ struct intel_perf_timeline_item {
 	/* Offsets into intel_perf_data_reader.records */
 	uint32_t record_start;
 	uint32_t record_end;
+	uint32_t report_start_offset;
+	uint32_t report_end_offset;
 
 	uint32_t hw_id;
 
diff --git a/tools/i915-perf/i915_perf_reader.c b/tools/i915-perf/i915_perf_reader.c
index e51f5a5d..12638685 100644
--- a/tools/i915-perf/i915_perf_reader.c
+++ b/tools/i915-perf/i915_perf_reader.c
@@ -252,7 +252,8 @@ main(int argc, char *argv[])
 			item->hw_id, item->hw_id == 0xffffffff ? "(idle)" : "");
 
 		intel_perf_accumulate_reports(&accu, reader.metric_set->perf_oa_format,
-					      i915_report0, i915_report1);
+					      i915_report0, i915_report1,
+					      item->report_start_offset, item->report_end_offset);
 
 		for (uint32_t c = 0; c < n_counters; c++) {
 			struct intel_perf_logical_counter *counter = counters[c];
-- 
2.20.1



More information about the igt-dev mailing list