|
3 | 3 | import collections
|
4 | 4 | import json
|
5 | 5 | import os
|
| 6 | +import re |
6 | 7 | import shlex
|
7 | 8 | import subprocess
|
8 | 9 | import sys
|
9 | 10 |
|
| 11 | +perf_scanner = re.Scanner([ |
| 12 | + (r'<', lambda scanner, token: token), |
| 13 | + (r'>', lambda scanner, token: token), |
| 14 | + (r'\(', lambda scanner, token: token), |
| 15 | + (r'\)', lambda scanner, token: token), |
| 16 | + (r'[ \t]+', lambda scanner, token: token), |
| 17 | + (r'[^<>() \t]+', lambda scanner, token: token), |
| 18 | +]) |
| 19 | + |
10 | 20 |
|
11 | 21 | def main():
|
12 | 22 | desc = 'Parse perf.data and return in JSON format.'
|
@@ -58,15 +68,55 @@ def parse_perf_report(perf_data_path):
|
58 | 68 |
|
59 | 69 | # get per-symbol count
|
60 | 70 | else:
|
| 71 | + tokens, remainder = perf_scanner.scan(line) |
| 72 | + if remainder: |
| 73 | + print('Line could not be tokenized', file=sys.stderr) |
| 74 | + print(' line:', repr(line), file=sys.stderr) |
| 75 | + print(' tokens:', tokens, file=sys.stderr) |
| 76 | + print(' remainder:', remainder, file=sys.stderr) |
| 77 | + sys.exit(os.EX_DATAERR) |
| 78 | + |
| 79 | + # Construct record from tokens |
| 80 | + # (NOTE: Not a proper grammar, just dumb bracket counting) |
| 81 | + record = [] |
| 82 | + bracks = 0 |
| 83 | + parens = 0 |
| 84 | + |
| 85 | + for tok in tokens: |
| 86 | + if tok == '<': |
| 87 | + bracks += 1 |
| 88 | + |
| 89 | + if tok == '(': |
| 90 | + parens += 1 |
| 91 | + |
| 92 | + rec = record[-1] if record else None |
| 93 | + |
| 94 | + inside_bracket = rec and (bracks > 0 or parens > 0) |
| 95 | + lead_rec = tok in '<(' and rec and not rec.isspace() |
| 96 | + tail_rec = not tok.isspace() and rec and rec[-1] in '>)' |
| 97 | + |
| 98 | + if inside_bracket or lead_rec or tail_rec: |
| 99 | + record[-1] += tok |
| 100 | + else: |
| 101 | + record.append(tok) |
| 102 | + |
| 103 | + if tok == '>': |
| 104 | + bracks -= 1 |
| 105 | + if tok == '(': |
| 106 | + parens -= 1 |
| 107 | + |
| 108 | + # Strip any whitespace tokens |
| 109 | + record = [rec for rec in record if not rec.isspace()] |
| 110 | + |
61 | 111 | try:
|
62 |
| - tokens = line.split() |
63 |
| - symbol = tokens[2] |
64 |
| - period = int(tokens[3]) |
65 |
| - except ValueError: |
| 112 | + symbol = record[2] |
| 113 | + period = int(record[3]) |
| 114 | + except: |
66 | 115 | print("parse_perf.py: Error extracting symbol count",
|
67 |
| - file=sys.stderr) |
| 116 | + file=sys.stderr) |
68 | 117 | print("line:", repr(line), file=sys.stderr)
|
69 | 118 | print("tokens:", tokens, file=sys.stderr)
|
| 119 | + print("record:", record, file=sys.stderr) |
70 | 120 | raise
|
71 | 121 |
|
72 | 122 | profile[event_name]['symbol'][symbol] = period
|
|
0 commit comments