[Intel-gfx] [PATCH i-g-t v4] igt/scripts: trace.pl to parse the i915 tracepoints
Tvrtko Ursulin
tursulin at ursulin.net
Mon Apr 24 14:42:48 UTC 2017
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Given a log file created via perf with some interesting trace
events enabled, this tool can generate the timeline graph of
requests getting queued, their dependencies resolved, sent to
the GPU for executing and finally completed.
This can be useful when analyzing certain classes of performance
issues. More help is available in the tool itself.
The tool will also calculate some overall per engine statistics,
like total time engine was idle and similar.
v2:
* Address missing git add.
* Make html output optional (--html switch) and by default
just output aggregated per engine stats to stdout.
v3:
* Added --trace option which invokes perf with the correct
options automatically.
* Added --avg-delay-stats which prints averages for things
like waiting on ready, waiting on GPU and context save
duration.
* Fix warnings when no waits on an engine.
* Correct help text.
v4:
* Add --squash-ctx-id to substract engine id from ctx id
when parsing to make it easier to identify which context
is which with new i915 ctx id allocation scheme.
* Reconstruct request_out events where they are missing.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Harri Syrja <harri.syrja at intel.com>
Cc: Krzysztof E Olinski <krzysztof.e.olinski at intel.com>
---
scripts/Makefile.am | 2 +-
scripts/trace.pl | 990 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 991 insertions(+), 1 deletion(-)
create mode 100755 scripts/trace.pl
diff --git a/scripts/Makefile.am b/scripts/Makefile.am
index 85d4a5cf4e5c..641715294936 100644
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@@ -1,2 +1,2 @@
-dist_noinst_SCRIPTS = intel-gfx-trybot who.sh run-tests.sh
+dist_noinst_SCRIPTS = intel-gfx-trybot who.sh run-tests.sh trace.pl
noinst_PYTHON = throttle.py
diff --git a/scripts/trace.pl b/scripts/trace.pl
new file mode 100755
index 000000000000..1f524aaa0f89
--- /dev/null
+++ b/scripts/trace.pl
@@ -0,0 +1,990 @@
+#! /usr/bin/perl
+#
+# Copyright © 2017 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+
+use strict;
+use warnings;
+use 5.010;
+
+my $gid = 0;
+my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait);
+my @freqs;
+
+my $max_items = 3000;
+my $width_us = 32000;
+my $correct_durations = 0;
+my %ignore_ring;
+my %skip_box;
+my $html = 0;
+my $trace = 0;
+my $avg_delay_stats = 0;
+my $squash_context_id = 0;
+
+my @args;
+
+sub arg_help
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--help' or $_[0] eq '-h') {
+ shift @_;
+print <<ENDHELP;
+Notes:
+
+ The tool parse the output generated by the 'perf script' command after the
+ correct set of i915 tracepoints have been collected via perf record.
+
+ To collect the data:
+
+ ./trace.pl --trace [command-to-be-profiled]
+
+ The above will invoke perf record, or alternatively it can be done directly:
+
+ perf record -a -c 1 -e i915:intel_gpu_freq_change, \
+ i915:i915_gem_request_add, \
+ i915:i915_gem_request_submit, \
+ i915:i915_gem_request_in, \
+ i915:i915_gem_request_out, \
+ i915:intel_engine_notify, \
+ i915:i915_gem_request_wait_begin, \
+ i915:i915_gem_request_wait_end \
+ [command-to-be-profiled]
+
+ Then create the log file with:
+
+ perf script >trace.log
+
+ This file in turn should be piped into this tool which will generate some
+ statistics out of it, or if --html was given HTML output.
+
+ HTML can be viewed from a directory containing the 'vis' JavaScript module.
+ On Ubuntu this can be installed like this:
+
+ apt-get install npm
+ npm install vis
+
+Usage:
+ trace.pl <options> <input-file >output-file
+
+ --help / -h This help text
+ --max-items=num / -m num Maximum number of boxes to put on the
+ timeline. More boxes means more work for
+ the JavaScript engine in the browser.
+ --zoom-width-ms=ms / -z ms Width of the initial timeline zoom
+ --split-requests / -s Try to split out request which were
+ submitted together due coalescing in the
+ driver. May not be 100% accurate and may
+ influence the per-engine statistics so
+ use with care.
+ --ignore-ring=id / -i id Ignore ring with the numerical id when
+ parsing the log (enum intel_engine_id).
+ Can be given multiple times.
+ --skip-box=name / -x name Do not put a certain type of a box on
+ the timeline. One of: queue, ready,
+ execute and ctxsave.
+ Can be given multiple times.
+ --html Generate HTML output.
+ --trace cmd Trace the following command.
+ --avg-delay-stats Print average delay stats.
+ --squash-ctx-id Squash context id by substracting engine
+ id from ctx id.
+ENDHELP
+
+ exit 0;
+ }
+
+ return @_;
+}
+
+sub arg_html
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--html') {
+ shift @_;
+ $html = 1;
+ }
+
+ return @_;
+}
+
+sub arg_avg_delay_stats
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--avg-delay-stats') {
+ shift @_;
+ $avg_delay_stats = 1;
+ }
+
+ return @_;
+}
+
+sub arg_squash_ctx_id
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--squash-ctx-id') {
+ shift @_;
+ $squash_context_id = 1;
+ }
+
+ return @_;
+}
+
+sub arg_trace
+{
+ my @events = ( 'i915:intel_gpu_freq_change',
+ 'i915:i915_gem_request_add',
+ 'i915:i915_gem_request_submit',
+ 'i915:i915_gem_request_in',
+ 'i915:i915_gem_request_out',
+ 'i915:intel_engine_notify',
+ 'i915:i915_gem_request_wait_begin',
+ 'i915:i915_gem_request_wait_end' );
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--trace') {
+ shift @_;
+
+ unshift @_, join(',', @events);
+ unshift @_, ('perf', 'record', '-a', '-c', '1', '-e');
+
+ exec @_;
+ }
+
+ return @_;
+}
+
+sub arg_max_items
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--max-items' or $_[0] eq '-m') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--max-items=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $max_items = int($val) if defined $val;
+
+ return @_;
+}
+
+sub arg_zoom_width
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--zoom-width-ms' or $_[0] eq '-z') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--zoom-width-ms=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $width_us = int($val) * 1000 if defined $val;
+
+ return @_;
+}
+
+sub arg_split_requests
+{
+ return unless scalar(@_);
+
+ if ($_[0] eq '--split-requests' or $_[0] eq '-s') {
+ shift @_;
+ $correct_durations = 1;
+ }
+
+ return @_;
+}
+
+sub arg_ignore_ring
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--ignore-ring' or $_[0] eq '-i') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--ignore-ring=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $ignore_ring{$val} = 1 if defined $val;
+
+ return @_;
+}
+
+sub arg_skip_box
+{
+ my $val;
+
+ return unless scalar(@_);
+
+ if ($_[0] eq '--skip-box' or $_[0] eq '-x') {
+ shift @_;
+ $val = shift @_;
+ } elsif ($_[0] =~ /--skip-box=(\d+)/) {
+ shift @_;
+ $val = $1;
+ }
+
+ $skip_box{$val} = 1 if defined $val;
+
+ return @_;
+}
+
+ at args = @ARGV;
+while (@args) {
+ my $left = scalar(@args);
+
+ @args = arg_help(@args);
+ @args = arg_html(@args);
+ @args = arg_avg_delay_stats(@args);
+ @args = arg_squash_ctx_id(@args);
+ @args = arg_trace(@args);
+ @args = arg_max_items(@args);
+ @args = arg_zoom_width(@args);
+ @args = arg_split_requests(@args);
+ @args = arg_ignore_ring(@args);
+ @args = arg_skip_box(@args);
+
+ last if $left == scalar(@args);
+}
+
+die if scalar(@args);
+
+ at ARGV = @args;
+
+sub parse_req
+{
+ my ($line, $tp) = @_;
+ state %cache;
+
+ $cache{$tp} = qr/(\d+)\.(\d+):.*$tp.*ring=(\d+), ctx=(\d+), seqno=(\d+), global(?:_seqno)?=(\d+)/ unless exists $cache{$tp};
+
+ if ($line =~ $cache{$tp}) {
+ return ($1, $2, $3, $4, $5, $6);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_req_hw
+{
+ my ($line, $tp) = @_;
+ state %cache;
+
+ $cache{$tp} = qr/(\d+)\.(\d+):.*$tp.*ring=(\d+), ctx=(\d+), seqno=(\d+), global(?:_seqno)?=(\d+), port=(\d+)/ unless exists $cache{$tp};
+
+ if ($line =~ $cache{$tp}) {
+ return ($1, $2, $3, $4, $5, $6, $7);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_req_wait_begin
+{
+ my ($line, $tp) = @_;
+
+ if ($line =~ /(\d+)\.(\d+):.*i915_gem_request_wait_begin.*ring=(\d+), ctx=(\d+), seqno=(\d+)/) {
+ return ($1, $2, $3, $4, $5);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_notify
+{
+ my ($line) = @_;
+
+ if ($line =~ /(\d+)\.(\d+):.*intel_engine_notify.*ring=(\d+), seqno=(\d+)/) {
+ return ($1, $2, $3, $4);
+ } else {
+ return undef;
+ }
+}
+
+sub parse_freq
+{
+ my ($line) = @_;
+
+ if ($line =~ /(\d+)\.(\d+):.*intel_gpu_freq_change.*new_freq=(\d+)/) {
+ return ($1, $2, $3);
+ } else {
+ return undef;
+ }
+}
+
+sub us
+{
+ my ($s, $us) = @_;
+
+ return $s * 1000000 + $us;
+}
+
+sub db_key
+{
+ my ($ring, $ctx, $seqno) = @_;
+
+ return $ring . '/' . $ctx . '/' . $seqno;
+}
+
+sub global_key
+{
+ my ($ring, $seqno) = @_;
+
+ return $ring . '/' . $seqno;
+}
+
+sub sanitize_ctx
+{
+ my ($ctx, $ring) = @_;
+
+ $ctx = $ctx - $ring if $squash_context_id;
+
+ if (exists $ctxdb{$ctx}) {
+ return $ctx . '.' . $ctxdb{$ctx};
+ } else {
+ return $ctx;
+ }
+}
+
+sub ts
+{
+ my ($us) = @_;
+ my ($h, $m, $s);
+
+ $s = int($us / 1000000);
+ $us = $us % 1000000;
+
+ $m = int($s / 60);
+ $s = $s % 60;
+
+ $h = int($m / 60);
+ $m = $m % 60;
+
+ return sprintf('2017-01-01 %02u:%02u:%02u.%06u', int($h), int($m), int($s), int($us));
+}
+
+# Main input loop - parse lines and build the internal representation of the
+# trace using a hash of requests and some auxilliary data structures.
+my $prev_freq = 0;
+my $prev_freq_ts = 0;
+my $oldkernelwa = 0;
+my ($no_queue, $no_in);
+while (<>) {
+ my ($s, $us, $ring, $ctx, $seqno, $global_seqno, $port);
+ my $freq;
+ my $key;
+
+ chomp;
+
+ ($s, $us, $ring, $ctx, $seqno) = parse_req_wait_begin($_);
+ if (defined $s) {
+ my %rw;
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ next if exists $reqwait{$key};
+
+ $rw{'key'} = $key;
+ $rw{'ring'} = $ring;
+ $rw{'seqno'} = $seqno;
+ $rw{'ctx'} = $ctx;
+ $rw{'start'} = us($s, $us);
+ $reqwait{$key} = \%rw;
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno) = parse_req($_, 'i915:i915_gem_request_wait_end');
+ if (defined $s) {
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ next unless exists $reqwait{$key};
+
+ $reqwait{$key}->{'end'} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno) = parse_req($_, 'i915:i915_gem_request_add');
+ if (defined $s) {
+ my $orig_ctx = $ctx;
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ if (exists $queue{$key}) {
+ $ctxdb{$orig_ctx}++;
+ $ctx = sanitize_ctx($orig_ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+ }
+
+ $queue{$key} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno) = parse_req($_, 'i915:i915_gem_request_submit');
+ if (defined $s) {
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ die if exists $submit{$key};
+ die unless exists $queue{$key};
+
+ $submit{$key} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno, $port) = parse_req_hw($_, 'i915:i915_gem_request_in');
+ if (defined $s) {
+ my %req;
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ die if exists $db{$key};
+ if (not exists $queue{$key} and $oldkernelwa) {
+ $no_queue++;
+ next;
+ }
+ die unless exists $queue{$key};
+ die unless exists $submit{$key};
+
+ $req{'start'} = us($s, $us);
+ $req{'ring'} = $ring;
+ $req{'seqno'} = $seqno;
+ $req{'ctx'} = $ctx;
+ $req{'name'} = $ctx . '/' . $seqno;
+ $req{'global'} = $global_seqno;
+ $req{'port'} = $port;
+ $req{'queue'} = $queue{$key};
+ $req{'submit-delay'} = $submit{$key} - $queue{$key};
+ $req{'execute-delay'} = $req{'start'} - $submit{$key};
+ $rings{$ring} = $gid++ unless exists $rings{$ring};
+ $ringmap{$rings{$ring}} = $ring;
+ $db{$key} = \%req;
+ next;
+ }
+
+ ($s, $us, $ring, $ctx, $seqno, $global_seqno, $port) = parse_req($_, 'i915:i915_gem_request_out');
+ if (defined $s) {
+ my $gkey = global_key($ring, $global_seqno);
+
+ next if exists $ignore_ring{$ring};
+
+ $ctx = sanitize_ctx($ctx, $ring);
+ $key = db_key($ring, $ctx, $seqno);
+
+ if (not exists $db{$key} and $oldkernelwa) {
+ $no_in++;
+ next;
+ }
+ die unless exists $db{$key};
+ die unless exists $db{$key}->{'start'};
+ die if exists $db{$key}->{'end'};
+
+ $db{$key}->{'end'} = us($s, $us);
+ if (exists $notify{$gkey}) {
+ $db{$key}->{'notify'} = $notify{$gkey};
+ } else {
+ # No notify so far. Maybe it will arrive later which
+ # will be handled in the sanitation loop below.
+ $db{$key}->{'notify'} = $db{$key}->{'end'};
+ $db{$key}->{'no-notify'} = 1;
+ }
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ next;
+ }
+
+ ($s, $us, $ring, $seqno) = parse_notify($_);
+ if (defined $s) {
+ next if exists $ignore_ring{$ring};
+ $notify{global_key($ring, $seqno)} = us($s, $us);
+ next;
+ }
+
+ ($s, $us, $freq) = parse_freq($_);
+ if (defined $s) {
+ my $cur = us($s, $us);
+
+ push @freqs, [$prev_freq_ts, $cur, $prev_freq] if $prev_freq;
+ $prev_freq_ts = $cur;
+ $prev_freq = $freq;
+ next;
+ }
+}
+
+# Sanitation pass to fixup up out of order notify and context complete, and to
+# fine the largest seqno to be used for timeline sorting purposes.
+my $max_seqno = 0;
+foreach my $key (keys %db) {
+ my $gkey = global_key($db{$key}->{'ring'}, $db{$key}->{'global'});
+
+ die unless exists $db{$key}->{'start'};
+
+ $max_seqno = $db{$key}->{'seqno'} if $db{$key}->{'seqno'} > $max_seqno;
+
+ unless (exists $db{$key}->{'end'}) {
+ # Context complete not received.
+ if (exists $notify{$gkey}) {
+ # No context complete due req merging - use notify.
+ $db{$key}->{'notify'} = $notify{$gkey};
+ $db{$key}->{'end'} = $db{$key}->{'notify'};
+ $db{$key}->{'no-end'} = 1;
+ } else {
+ # No notify and no context complete - mark it.
+ $db{$key}->{'no-end'} = 1;
+ $db{$key}->{'end'} = $db{$key}->{'start'} + 999;
+ $db{$key}->{'notify'} = $db{$key}->{'end'};
+ $db{$key}->{'incomplete'} = 1;
+ }
+
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ } else {
+ # Notify arrived after context complete.
+ if (exists $db{$key}->{'no-notify'} and exists $notify{$gkey}) {
+ delete $db{$key}->{'no-notify'};
+ $db{$key}->{'notify'} = $notify{$gkey};
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ }
+ }
+}
+
+# Fix up incompletes
+foreach my $key (keys %db) {
+ next unless exists $db{$key}->{'incomplete'};
+
+ # End the incomplete batch at the time next one starts
+ my $ring = $db{$key}->{'ring'};
+ my $ctx = $db{$key}->{'ctx'};
+ my $seqno = $db{$key}->{'seqno'};
+ my $next_key;
+ my $i = 1;
+
+ do {
+ $next_key = db_key($ring, $ctx, $seqno + $i);
+ $i++;
+ } until ((exists $db{$next_key} and not exists $db{$next_key}->{'incomplete'})
+ or $i > scalar(keys(%db))); # ugly stop hack
+
+ if (exists $db{$next_key}) {
+ $db{$key}->{'notify'} = $db{$next_key}->{'end'};
+ $db{$key}->{'end'} = $db{$key}->{'notify'};
+ $db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
+ $db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ }
+}
+
+# GPU time accounting
+my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
+my (%submit_avg, %execute_avg, %ctxsave_avg);
+my $last_ts = 0;
+my $first_ts;
+
+foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ my $ring = $db{$key}->{'ring'};
+ my $end = $db{$key}->{'end'};
+
+ $first_ts = $db{$key}->{'queue'} if not defined $first_ts or $db{$key}->{'queue'} < $first_ts;
+ $last_ts = $end if $end > $last_ts;
+
+ # Adjust batch start with legacy execlists.
+ # Port == 2 mean batch was merged udring queuing and hasn't actually
+ # been submitted to the gpu until the batch with port < 2 is found.
+ if ($correct_durations and $oldkernelwa and $db{$key}->{'port'} == 2) {
+ my $ctx = $db{$key}->{'ctx'};
+ my $seqno = $db{$key}->{'seqno'};
+ my $next_key;
+ my $i = 1;
+
+ do {
+ $next_key = db_key($ring, $ctx, $seqno + $i);
+ $i++;
+ } until ((exists $db{$next_key} and $db{$next_key}->{'port'} < 2) or $i > scalar(keys(%db))); # ugly stop hack
+
+ if (exists $db{$next_key}) {
+ $db{$key}->{'start'} = $db{$next_key}->{'start'};
+ $db{$key}->{'end'} = $db{$next_key}->{'end'};
+ die if $db{$key}->{'start'} > $db{$key}->{'end'};
+ }
+ }
+
+ $running{$ring} += $end - $db{$key}->{'start'} unless exists $db{$key}->{'no-end'};
+ $runnable{$ring} += $db{$key}->{'execute-delay'};
+ $queued{$ring} += $db{$key}->{'start'} - $db{$key}->{'execute-delay'} - $db{$key}->{'queue'};
+
+ $batch_count{$ring}++;
+
+ # correct duration of merged batches
+ if ($correct_durations and exists $db{$key}->{'no-end'}) {
+ my $start = $db{$key}->{'start'};
+ my $ctx = $db{$key}->{'ctx'};
+ my $seqno = $db{$key}->{'seqno'};
+ my $next_key;
+ my $i = 1;
+
+ do {
+ $next_key = db_key($ring, $ctx, $seqno + $i);
+ $i++;
+ } until (exists $db{$next_key} or $i > scalar(keys(%db))); # ugly stop hack
+
+ # 20us tolerance
+ if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
+ $db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
+ $db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
+ $db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
+ $end = $db{$key}->{'notify'};
+ die if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
+ }
+ die if $db{$key}->{'start'} > $db{$key}->{'end'};
+ }
+ $batch_avg{$ring} += $db{$key}->{'duration'};
+ $batch_total_avg{$ring} += $end - $db{$key}->{'start'};
+
+ $submit_avg{$ring} += $db{$key}->{'submit-delay'};
+ $execute_avg{$ring} += $db{$key}->{'execute-delay'};
+ $ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
+}
+
+foreach my $ring (keys %batch_avg) {
+ $batch_avg{$ring} /= $batch_count{$ring};
+ $batch_total_avg{$ring} /= $batch_count{$ring};
+ $submit_avg{$ring} /= $batch_count{$ring};
+ $execute_avg{$ring} /= $batch_count{$ring};
+ $ctxsave_avg{$ring} /= $batch_count{$ring};
+}
+
+# Calculate engine idle time
+my %flat_busy;
+foreach my $gid (sort keys %rings) {
+ my $ring = $ringmap{$rings{$gid}};
+ my (@s_, @e_);
+
+ # Extract all GPU busy intervals and sort them.
+ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
+ next unless $db{$key}->{'ring'} == $ring;
+ push @s_, $db{$key}->{'start'};
+ push @e_, $db{$key}->{'end'};
+ die if $db{$key}->{'start'} > $db{$key}->{'end'};
+ }
+
+ die unless $#s_ == $#e_;
+
+ # Flatten the intervals.
+ for my $i (1..$#s_) {
+ last if $i >= @s_; # End of array.
+ die if $e_[$i] < $s_[$i];
+ if ($s_[$i] <= $e_[$i - 1]) {
+ # Current entry overlaps with the previous one. We need
+ # to merge end of the previous interval from the list
+ # with the start of the current one.
+ splice @e_, $i - 1, 1;
+ splice @s_, $i, 1;
+ # Continue with the same element when list got squashed.
+ redo;
+ }
+ }
+
+ # Add up all busy times.
+ my $total = 0;
+ for my $i (0..$#s_) {
+ die if $e_[$i] < $s_[$i];
+
+ $total = $total + ($e_[$i] - $s_[$i]);
+ }
+
+ $flat_busy{$ring} = $total;
+}
+
+my %reqw;
+foreach my $key (keys %reqwait) {
+ $reqw{$reqwait{$key}->{'ring'}} += $reqwait{$key}->{'end'} - $reqwait{$key}->{'start'};
+}
+
+print <<ENDHTML if $html;
+<!DOCTYPE HTML>
+<html>
+<head>
+ <title>i915 GT timeline</title>
+
+ <style type="text/css">
+ body, html {
+ font-family: sans-serif;
+ }
+ </style>
+
+ <script src="node_modules/vis/dist/vis.js"></script>
+ <link href="node_modules/vis//dist/vis.css" rel="stylesheet" type="text/css" />
+</head>
+<body>
+
+<button onclick="toggleStackSubgroups()">Toggle stacking</button>
+
+<p>
+pink = requests executing on the GPU<br>
+grey = runnable requests waiting for a slot on GPU<br>
+blue = requests waiting on fences and dependencies before they are runnable<br>
+</p>
+<p>
+Boxes are in format 'ctx-id/seqno'.
+</p>
+<p>
+Use Ctrl+scroll-action to zoom-in/out and scroll-action or dragging to move around the timeline.
+</p>
+
+<div id="visualization"></div>
+
+<script type="text/javascript">
+ var container = document.getElementById('visualization');
+
+ var groups = new vis.DataSet([
+ENDHTML
+
+# var groups = new vis.DataSet([
+# {id: 1, content: 'g0'},
+# {id: 2, content: 'g1'}
+# ]);
+
+sub html_stats
+{
+ my ($stats, $group, $id) = @_;
+ my $name;
+
+ $name = 'Ring' . $group;
+ $name .= '<br><small><br>';
+ $name .= sprintf('%2.2f', $stats->{'idle'}) . '% idle<br><br>';
+ $name .= sprintf('%2.2f', $stats->{'busy'}) . '% busy<br>';
+ $name .= sprintf('%2.2f', $stats->{'runnable'}) . '% runnable<br>';
+ $name .= sprintf('%2.2f', $stats->{'queued'}) . '% queued<br><br>';
+ $name .= sprintf('%2.2f', $stats->{'wait'}) . '% wait<br><br>';
+ $name .= $stats->{'count'} . ' batches<br>';
+ $name .= sprintf('%2.2f', $stats->{'avg'}) . 'us avg batch<br>';
+ $name .= sprintf('%2.2f', $stats->{'total-avg'}) . 'us avg engine batch<br>';
+ $name .= '</small>';
+
+ print "\t{id: $id, content: '$name'},\n";
+}
+
+sub stdio_stats
+{
+ my ($stats, $group, $id) = @_;
+ my $str;
+
+ $str = 'Ring' . $group . ': ';
+ $str .= $stats->{'count'} . ' batches, ';
+ $str .= sprintf('%2.2f (%2.2f) avg batch us, ', $stats->{'avg'}, $stats->{'total-avg'});
+ $str .= sprintf('%2.2f', $stats->{'idle'}) . '% idle, ';
+ $str .= sprintf('%2.2f', $stats->{'busy'}) . '% busy, ';
+ $str .= sprintf('%2.2f', $stats->{'runnable'}) . '% runnable, ';
+ $str .= sprintf('%2.2f', $stats->{'queued'}) . '% queued, ';
+ $str .= sprintf('%2.2f', $stats->{'wait'}) . '% wait';
+ if ($avg_delay_stats) {
+ $str .= ', submit/execute/save-avg=(';
+ $str .= sprintf('%2.2f/%2.2f/%2.2f', $stats->{'submit'}, $stats->{'execute'}, $stats->{'save'});
+ }
+ $str .= ')';
+
+ say $str;
+}
+
+print "\t{id: 0, content: 'Freq'},\n" if $html;
+foreach my $group (sort keys %rings) {
+ my $name;
+ my $ring = $ringmap{$rings{$group}};
+ my $id = 1 + $rings{$group};
+ my $elapsed = $last_ts - $first_ts;
+ my %stats;
+
+ $stats{'idle'} = (1.0 - $flat_busy{$ring} / $elapsed) * 100.0;
+ $stats{'busy'} = $running{$ring} / $elapsed * 100.0;
+ $stats{'runnable'} = $runnable{$ring} / $elapsed * 100.0;
+ $stats{'queued'} = $queued{$ring} / $elapsed * 100.0;
+ $reqw{$ring} = 0 unless exists $reqw{$ring};
+ $stats{'wait'} = $reqw{$ring} / $elapsed * 100.0;
+ $stats{'count'} = $batch_count{$ring};
+ $stats{'avg'} = $batch_avg{$ring};
+ $stats{'total-avg'} = $batch_total_avg{$ring};
+ $stats{'submit'} = $submit_avg{$ring};
+ $stats{'execute'} = $execute_avg{$ring};
+ $stats{'save'} = $ctxsave_avg{$ring};
+
+ if ($html) {
+ html_stats(\%stats, $group, $id);
+ } else {
+ stdio_stats(\%stats, $group, $id);
+ }
+}
+
+exit 0 unless $html;
+
+print <<ENDHTML;
+ ]);
+
+ var items = new vis.DataSet([
+ENDHTML
+
+my $i = 0;
+foreach my $key (sort {$db{$a}->{'queue'} <=> $db{$b}->{'queue'}} keys %db) {
+ my ($name, $ctx, $seqno) = ($db{$key}->{'name'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'});
+ my ($queue, $start, $notify, $end) = ($db{$key}->{'queue'}, $db{$key}->{'start'}, $db{$key}->{'notify'}, $db{$key}->{'end'});
+ my $submit = $queue + $db{$key}->{'submit-delay'};
+ my ($content, $style);
+ my $group = 1 + $rings{$db{$key}->{'ring'}};
+ my $type = ' type: \'range\',';
+ my $startend;
+ my $skey;
+
+ # submit to execute
+ unless (exists $skip_box{'queue'}) {
+ $skey = 2 * $max_seqno * $ctx + 2 * $seqno;
+ $style = 'color: black; background-color: lightblue;';
+ $content = "$name<br>$db{$key}->{'submit-delay'}us <small>($db{$key}->{'execute-delay'}us)</small>";
+ $startend = 'start: \'' . ts($queue) . '\', end: \'' . ts($submit) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 1, subgroupOrder: 1, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ # execute to start
+ unless (exists $skip_box{'ready'}) {
+ $skey = 2 * $max_seqno * $ctx + 2 * $seqno + 1;
+ $style = 'color: black; background-color: lightgrey;';
+ $content = "<small>$name<br>$db{$key}->{'execute-delay'}us</small>";
+ $startend = 'start: \'' . ts($submit) . '\', end: \'' . ts($start) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 1, subgroupOrder: 2, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ # start to user interrupt
+ unless (exists $skip_box{'execute'}) {
+ $skey = -2 * $max_seqno * $ctx - 2 * $seqno - 1;
+ if (exists $db{$key}->{'incomplete'}) {
+ $style = 'color: white; background-color: red;';
+ } else {
+ $style = 'color: black; background-color: pink;';
+ }
+ $content = "$name <small>$db{$key}->{'port'}</small>";
+ $content .= ' <small><i>???</i></small> ' if exists $db{$key}->{'incomplete'};
+ $content .= ' <small><i>++</i></small> ' if exists $db{$key}->{'no-end'};
+ $content .= ' <small><i>+</i></small> ' if exists $db{$key}->{'no-notify'};
+ $content .= "<br>$db{$key}->{'duration'}us <small>($db{$key}->{'context-complete-delay'}us)</small>";
+ $startend = 'start: \'' . ts($start) . '\', end: \'' . ts($notify) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 2, subgroupOrder: 3, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ # user interrupt to context complete
+ unless (exists $skip_box{'ctxsave'}) {
+ $skey = -2 * $max_seqno * $ctx - 2 * $seqno;
+ $style = 'color: black; background-color: orange;';
+ my $ctxsave = $db{$key}->{'end'} - $db{$key}->{'notify'};
+ $content = "<small>$name<br>${ctxsave}us</small>";
+ $content .= ' <small><i>???</i></small> ' if exists $db{$key}->{'incomplete'};
+ $content .= ' <small><i>++</i></small> ' if exists $db{$key}->{'no-end'};
+ $content .= ' <small><i>+</i></small> ' if exists $db{$key}->{'no-notify'};
+ $startend = 'start: \'' . ts($notify) . '\', end: \'' . ts($end) . '\'';
+ print "\t{id: $i, key: $skey, $type group: $group, subgroup: 2, subgroupOrder: 4, content: '$content', $startend, style: \'$style\'},\n";
+ $i++;
+ }
+
+ $last_ts = $end;
+
+ last if $i > $max_items;
+}
+
+foreach my $item (@freqs) {
+ my ($start, $end, $freq) = @$item;
+ my $startend;
+
+ next if $start > $last_ts;
+
+ $start = $first_ts if $start < $first_ts;
+ $end = $last_ts if $end > $last_ts;
+ $startend = 'start: \'' . ts($start) . '\', end: \'' . ts($end) . '\'';
+ print "\t{id: $i, type: 'range', group: 0, content: '$freq', $startend},\n";
+ $i++;
+}
+
+my $end_ts = ts($first_ts + $width_us);
+$first_ts = ts($first_ts);
+
+print <<ENDHTML;
+ ]);
+
+ function customOrder (a, b) {
+ // order by id
+ return a.subgroupOrder - b.subgroupOrder;
+ }
+
+ // Configuration for the Timeline
+ var options = { groupOrder: 'content',
+ horizontalScroll: true,
+ stack: true,
+ stackSubgroups: false,
+ zoomKey: 'ctrlKey',
+ orientation: 'top',
+ order: customOrder,
+ start: '$first_ts',
+ end: '$end_ts'};
+
+ // Create a Timeline
+ var timeline = new vis.Timeline(container, items, groups, options);
+
+ function toggleStackSubgroups() {
+ options.stackSubgroups = !options.stackSubgroups;
+ timeline.setOptions(options);
+ }
+ENDHTML
+
+print <<ENDHTML;
+</script>
+</body>
+</html>
+ENDHTML
--
2.9.3
More information about the Intel-gfx
mailing list