Skip to content

Commit 6272bbc

Browse files
marshallwardadcroft
authored andcommitted
Testing: Correct perf parsing of C++ output
This patch fixes errors in the parser of perf output. Previously, each record was assumed to be separated by spaces, but this failed for more generic records (usually from C++) which included signatures (such as `f(a, b)`) or templates (`f<a, b>`). Nested constructs were also possible. This is fixed by introducing a simple tokenizer which extracts <, (, and whitespace from the output , then rebuilds the records by combining any whitespace which appears inside of delimiters. This patch should hopefully resolve the CI errors in GitHub Actions.
1 parent e253883 commit 6272bbc

File tree

1 file changed

+55
-5
lines changed

1 file changed

+55
-5
lines changed

.testing/tools/parse_perf.py

+55-5
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,20 @@
33
import collections
44
import json
55
import os
6+
import re
67
import shlex
78
import subprocess
89
import sys
910

11+
perf_scanner = re.Scanner([
12+
(r'<', lambda scanner, token: token),
13+
(r'>', lambda scanner, token: token),
14+
(r'\(', lambda scanner, token: token),
15+
(r'\)', lambda scanner, token: token),
16+
(r'[ \t]+', lambda scanner, token: token),
17+
(r'[^<>() \t]+', lambda scanner, token: token),
18+
])
19+
1020

1121
def main():
1222
desc = 'Parse perf.data and return in JSON format.'
@@ -58,15 +68,55 @@ def parse_perf_report(perf_data_path):
5868

5969
# get per-symbol count
6070
else:
71+
tokens, remainder = perf_scanner.scan(line)
72+
if remainder:
73+
print('Line could not be tokenized', file=sys.stderr)
74+
print(' line:', repr(line), file=sys.stderr)
75+
print(' tokens:', tokens, file=sys.stderr)
76+
print(' remainder:', remainder, file=sys.stderr)
77+
sys.exit(os.EX_DATAERR)
78+
79+
# Construct record from tokens
80+
# (NOTE: Not a proper grammar, just dumb bracket counting)
81+
record = []
82+
bracks = 0
83+
parens = 0
84+
85+
for tok in tokens:
86+
if tok == '<':
87+
bracks += 1
88+
89+
if tok == '(':
90+
parens += 1
91+
92+
rec = record[-1] if record else None
93+
94+
inside_bracket = rec and (bracks > 0 or parens > 0)
95+
lead_rec = tok in '<(' and rec and not rec.isspace()
96+
tail_rec = not tok.isspace() and rec and rec[-1] in '>)'
97+
98+
if inside_bracket or lead_rec or tail_rec:
99+
record[-1] += tok
100+
else:
101+
record.append(tok)
102+
103+
if tok == '>':
104+
bracks -= 1
105+
if tok == '(':
106+
parens -= 1
107+
108+
# Strip any whitespace tokens
109+
record = [rec for rec in record if not rec.isspace()]
110+
61111
try:
62-
tokens = line.split()
63-
symbol = tokens[2]
64-
period = int(tokens[3])
65-
except ValueError:
112+
symbol = record[2]
113+
period = int(record[3])
114+
except:
66115
print("parse_perf.py: Error extracting symbol count",
67-
file=sys.stderr)
116+
file=sys.stderr)
68117
print("line:", repr(line), file=sys.stderr)
69118
print("tokens:", tokens, file=sys.stderr)
119+
print("record:", record, file=sys.stderr)
70120
raise
71121

72122
profile[event_name]['symbol'][symbol] = period

0 commit comments

Comments
 (0)