[Intel-gfx] [PATCH i-g-t] scripts/trace.pl: Optimize event parsing and processing

John Harrison John.C.Harrison at Intel.com
Tue Dec 19 16:23:23 UTC 2017


On 12/19/2017 3:15 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>
> A couple of small optimizations which altogether bring around 30%
> improvement in my testing.
>
> 1. Do less string processing on tracepoints names and push more of the
>     check into the if-ladder.
>
> 2. Pull out common db key and ctx processing and cache common values in
>     local vars.
>
> 3. Key value pair parsing is faster with a regexp.
>
> 4. Avoid sorting the db hash multiple times if possible.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: John Harrison <John.C.Harrison at intel.com>
> ---
>   scripts/trace.pl | 114 ++++++++++++++++++++++++-------------------------------
>   1 file changed, 49 insertions(+), 65 deletions(-)
> ---
> John please check if this helps with your monster traces before we decide
> if it is worth putting this in. Well, there isn't much of a downside
> except touching something which works and risking breaking it.
>
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index 98e4a9843a43..5463e1fd1954 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -356,110 +356,91 @@ my $prev_freq = 0;
>   my $prev_freq_ts = 0;
>   while (<>) {
>   	my @fields;
> -	my @tmp;
>   	my $tp_name;
> -	my $time;
>   	my %tp;
> -	my $key;
> +	my ($time, $ctx, $ring, $seqno, $orig_ctx, $key);
>   
>   	chomp;
>   	@fields = split ' ';
>   
> +	chop $fields[3];
> +	$time = int($fields[3] * 1000000.0);
> +
>   	$tp_name = $fields[4];
> -	@tmp = split ':', $tp_name, 2;
> -	next unless $tmp[0] eq 'i915';
> -	$tp_name = $tmp[1];
> -	chop $tp_name;
>   
> -	chop $fields[3];
> -	$time = $fields[3] * 1000000.0;
>   	splice @fields, 0, 5;
>   
>   	foreach my $f (@fields) {
> -		my @kv = split '=|,', $f;
> -
> -		$kv[0] = 'global' if $kv[0] eq 'global_seqno';
> +		my ($k, $v);
>   
> -		$tp{$kv[0]} = $kv[1];
> +		($k, $v) = ($f =~ /^(\S+)=(\d+)/);
I recall something about using () selection is slower than using 
pre-defined values. E.g.
   $f =~ m/=/; ($k, $v) = ($`, $');

Or do you specifically need the filtering of only matching numeric values?

> +		next unless defined $k;
> +		$k = 'global' if $k eq 'global_seqno';
> +		$tp{$k} = $v;
>   	}
>   
>   	next if exists $tp{'ring'} and exists $ignore_ring{$tp{'ring'}};
>   
> -	if ($tp_name eq 'i915_gem_request_wait_begin') {
> -		my %rw;
> +	if (exists $tp{'ring'} and exists $tp{'ctx'} and exists $tp{'seqno'}) {
> +		$ring = $tp{'ring'};
> +		$ctx = $tp{'ctx'};
> +		$seqno = $tp{'seqno'};
> +
> +		$orig_ctx = $ctx;
> +		$ctx = sanitize_ctx($ctx, $ring);
> +		$key = db_key($ring, $ctx, $seqno);
> +	}
>   
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +	if ($tp_name eq 'i915:i915_gem_request_wait_begin:') {
> +		my %rw;
>   
>   		next if exists $reqwait{$key};
>   
>   		$rw{'key'} = $key;
> -		$rw{'ring'} = $tp{'ring'};
> -		$rw{'seqno'} = $tp{'seqno'};
> -		$rw{'ctx'} = $tp{'ctx'};
> +		$rw{'ring'} = $ring;
> +		$rw{'seqno'} = $seqno;
> +		$rw{'ctx'} = $ctx;
>   		$rw{'start'} = $time;
>   		$reqwait{$key} = \%rw;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_wait_end') {
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_wait_end:') {
>   		next unless exists $reqwait{$key};
>   
>   		$reqwait{$key}->{'end'} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_add') {
> -		my $orig_ctx = $tp{'ctx'};
> -
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_add:') {
>   		if (exists $queue{$key}) {
>   			$ctxdb{$orig_ctx}++;
> -			$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -			$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +			$ctx = sanitize_ctx($ctx, $ring);
This bit originally worked on $orig_ctx not $ctx (i.e., before your 
previous refactoring patch). Which should it be? Will a double 
sanitisation step work as intended? I don't actually hit this line in 
any of the runs I've done.


> +			$key = db_key($ring, $ctx, $seqno);
>   		}
>   
>   		$queue{$key} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_submit') {
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
> +	} elsif ($tp_name eq 'i915:i915_gem_request_submit:') {
>   		die if exists $submit{$key};
>   		die unless exists $queue{$key};
>   
>   		$submit{$key} = $time;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_in') {
> +	} elsif ($tp_name eq 'i915:i915_gem_request_in:') {
>   		my %req;
>   
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> -
>   		die if exists $db{$key};
>   		die unless exists $queue{$key};
>   		die unless exists $submit{$key};
>   
>   		$req{'start'} = $time;
> -		$req{'ring'} = $tp{'ring'};
> -		$req{'seqno'} = $tp{'seqno'};
> -		$req{'ctx'} = $tp{'ctx'};
> -		$req{'name'} = $tp{'ctx'} . '/' . $tp{'seqno'};
> +		$req{'ring'} = $ring;
> +		$req{'seqno'} = $seqno;
> +		$req{'ctx'} = $ctx;
> +		$req{'name'} = $ctx . '/' . $seqno;
>   		$req{'global'} = $tp{'global'};
>   		$req{'port'} = $tp{'port'};
>   		$req{'queue'} = $queue{$key};
>   		$req{'submit-delay'} = $submit{$key} - $queue{$key};
>   		$req{'execute-delay'} = $req{'start'} - $submit{$key};
> -		$rings{$tp{'ring'}} = $gid++ unless exists $rings{$tp{'ring'}};
> -		$ringmap{$rings{$tp{'ring'}}} = $tp{'ring'};
> +		$rings{$ring} = $gid++ unless exists $rings{$ring};
> +		$ringmap{$rings{$ring}} = $ring;
>   		$db{$key} = \%req;
> -		next;
> -	} elsif ($tp_name eq 'i915_gem_request_out') {
> -		my $gkey = global_key($tp{'ring'}, $tp{'global'});
> -
> -		$tp{'ctx'} = sanitize_ctx($tp{'ctx'}, $tp{'ring'});
> -		$key = db_key($tp{'ring'}, $tp{'ctx'}, $tp{'seqno'});
> +	} elsif ($tp_name eq 'i915:i915_gem_request_out:') {
> +		my $gkey = global_key($ring, $tp{'global'});
>   
>   		die unless exists $db{$key};
>   		die unless exists $db{$key}->{'start'};
> @@ -476,15 +457,12 @@ while (<>) {
>   		}
>   		$db{$key}->{'duration'} = $db{$key}->{'notify'} - $db{$key}->{'start'};
>   		$db{$key}->{'context-complete-delay'} = $db{$key}->{'end'} - $db{$key}->{'notify'};
> -		next;
> -	} elsif ($tp_name eq 'intel_engine_notify') {
> -		$notify{global_key($tp{'ring'}, $tp{'seqno'})} = $time;
> -		next;
> -	} elsif ($tp_name eq 'intel_gpu_freq_change') {
> +	} elsif ($tp_name eq 'intel_engine_notify:') {
> +		$notify{global_key($ring, $seqno)} = $time;
> +	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
>   		push @freqs, [$prev_freq_ts, $time, $prev_freq] if $prev_freq;
>   		$prev_freq_ts = $time;
>   		$prev_freq = $tp{'new_freq'};
> -		next;
>   	}
>   }
>   
> @@ -557,7 +535,10 @@ my (%submit_avg, %execute_avg, %ctxsave_avg);
>   my $last_ts = 0;
>   my $first_ts;
>   
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +my @sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db;
> +my $re_sort = 0;
> +
> +foreach my $key (@sorted_keys) {
>   	my $ring = $db{$key}->{'ring'};
>   	my $end = $db{$key}->{'end'};
>   
> @@ -585,6 +566,7 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>   
>   		# 20us tolerance
>   		if (exists $db{$next_key} and $db{$next_key}->{'start'} < $start + 20) {
> +			$re_sort = 1;
>   			$db{$next_key}->{'start'} = $start + $db{$key}->{'duration'};
>   			$db{$next_key}->{'start'} = $db{$next_key}->{'end'} if $db{$next_key}->{'start'} > $db{$next_key}->{'end'};
>   			$db{$next_key}->{'duration'} = $db{$next_key}->{'notify'} - $db{$next_key}->{'start'};
> @@ -601,6 +583,8 @@ foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
>   	$ctxsave_avg{$ring} += $db{$key}->{'end'} - $db{$key}->{'notify'};
>   }
>   
> + at sorted_keys = sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db if $re_sort;
> +
>   foreach my $ring (keys %batch_avg) {
>   	$batch_avg{$ring} /= $batch_count{$ring};
>   	$batch_total_avg{$ring} /= $batch_count{$ring};
> @@ -616,7 +600,7 @@ foreach my $gid (sort keys %rings) {
>   	my (@s_, @e_);
>   
>   	# Extract all GPU busy intervals and sort them.
> -	foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +	foreach my $key (@sorted_keys) {
>   		next unless $db{$key}->{'ring'} == $ring;
>   		push @s_, $db{$key}->{'start'};
>   		push @e_, $db{$key}->{'end'};
> @@ -660,7 +644,7 @@ my @gpu_intervals;
>   my (@s_, @e_);
>   
>   # Extract all GPU busy intervals and sort them.
> -foreach my $key (sort {$db{$a}->{'start'} <=> $db{$b}->{'start'}} keys %db) {
> +foreach my $key (@sorted_keys) {
>   	push @s_, $db{$key}->{'start'};
>   	push @e_, $db{$key}->{'end'};
>   	die if $db{$key}->{'start'} > $db{$key}->{'end'};



More information about the Intel-gfx mailing list