Skip to content

Commit e117366

Browse files
authored
Merge pull request #3 from dalager/Refactor
Refactoring, cleanup and test improvements
2 parents 947b774 + d10f1a9 commit e117366

6 files changed

+313
-295
lines changed

.flake8

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[flake8]
2+
max-line-length = 100

README.md

+19-18
Original file line numberDiff line numberDiff line change
@@ -79,24 +79,25 @@ In [graph_visualizer.py](graph_visualizer.py) you will find three configuration
7979
The following is the output of running the script on the provided `Book1.xlsx` file.
8080

8181
```bash
82-
=== Dependency Graph Summary ===
83-
Cell/Node count 50
84-
Dependency count 60
85-
86-
=== Nodes with the highest degree ===
87-
Range!F1 10
88-
Base!B5 4
89-
Base!B12 3
90-
Base!B17 3
91-
Base!I21 3
92-
Base!G22 3
93-
Base!B22 3
94-
Base!B28 3
95-
Range!B2 3
96-
Range!B3 3
97-
98-
=== Formula functions by count ===
99-
SUM 3
82+
=== Dependency Graph Summary ===
83+
Cell/Node count 70
84+
Dependency count 100
85+
86+
87+
=== Most connected nodes ===
88+
Range Madness!A2:A11 22
89+
Range Madness!B2:B11 11
90+
Range Madness!F1 10
91+
Main Sheet!B5 4
92+
Main Sheet!B22 4
93+
Detached !A2:A4 4
94+
Range Madness!B2 4
95+
Range Madness!B3 4
96+
Range Madness!B4 4
97+
Range Madness!B5 4
98+
99+
=== Most used functions ===
100+
SUM 4
100101
POWER 1
101102

102103
Visualizing the graph of dependencies.

excel_parser.py

+29-17
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,75 @@
11
from openpyxl.utils import get_column_letter, range_boundaries
22
import re
3-
3+
from typing import List, Tuple, Dict
44

55
# Regex to detect cell references like A1, B2, or ranges like A1:B2
66
CELL_REF_REGEX = r"('?[A-Za-z0-9_\-\[\] ]+'?![A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)|([A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)" # noqa
77

88

9-
def expand_range(range_ref):
9+
def expand_range(range_reference: str) -> List[str]:
1010
"""
1111
Expand a range reference (e.g., 'A1:A3') into a list of individual cell references.
12-
"""
1312
13+
Args:
14+
range_ref (str): The range reference to expand.
15+
16+
Returns:
17+
List[str]: A list of individual cell references.
18+
"""
1419
# if there is a sheet name in the range reference, put it away for now
15-
if "!" in range_ref:
16-
sheet_name, range_ref = range_ref.split("!")
20+
if "!" in range_reference:
21+
sheet_name, range_reference = range_reference.split("!")
1722
else:
1823
sheet_name = None
1924

20-
min_col, min_row, max_col, max_row = range_boundaries(range_ref)
25+
min_col, min_row, max_col, max_row = range_boundaries(range_reference)
2126
expanded_cells = []
2227

2328
# Loop over rows and columns in the range
2429
for row in range(min_row, max_row + 1):
2530
for col in range(min_col, max_col + 1):
26-
# if sheetname is set
31+
cell_ref = f"{get_column_letter(col)}{row}"
2732
if sheet_name:
28-
expanded_cells.append(f"{sheet_name}!{get_column_letter(col)}{row}")
33+
expanded_cells.append(f"{sheet_name}!{cell_ref}")
2934
else:
30-
expanded_cells.append(f"{get_column_letter(col)}{row}")
35+
expanded_cells.append(cell_ref)
3136

3237
return expanded_cells
3338

3439

35-
def extract_references(formula):
40+
def extract_references(formula: str) -> Tuple[List[str], List[str], Dict[str, str]]:
3641
"""
3742
Extract all referenced cells and ranges from a formula using regular expressions.
3843
This returns a list of both individual cells and range references.
44+
45+
Args:
46+
formula (str): The formula to extract references from.
47+
48+
Returns:
49+
Tuple[List[str], List[str], Dict[str, str]]: A tuple containing lists of direct references,
50+
range references, and a dictionary of dependencies.
3951
"""
4052
formula = formula.replace("$", "")
4153
matches = re.findall(CELL_REF_REGEX, formula)
4254
references = [match[0] if match[0] else match[2] for match in matches]
4355

44-
# trim the extracted references
56+
# Trim the extracted references
4557
references = [ref.strip() for ref in references]
4658

4759
expanded_references = []
4860
dependencies = {}
4961
direct_references = []
5062
range_references = []
5163

52-
for ref in references:
53-
if ":" in ref: # it's a range like A1:A3
54-
expanded_cells = expand_range(ref)
64+
for reference in references:
65+
if ":" in reference: # it's a range like A1:A3
66+
expanded_cells = expand_range(reference)
5567
expanded_references.extend(expanded_cells)
56-
range_references.append(ref)
68+
range_references.append(reference)
5769
# Store the range-to-cells relationship
5870
for cell in expanded_cells:
59-
dependencies[cell] = ref
71+
dependencies[cell] = reference
6072
else: # single cell
61-
direct_references.append(ref)
73+
direct_references.append(reference)
6274

6375
return direct_references, range_references, dependencies

graph_summarizer.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from collections import Counter
2+
3+
4+
def print_summary(graph, functionsdict):
5+
"""
6+
Summarize a networkx DiGraph representing a dependency graph and print the most used functions in the formulas.
7+
"""
8+
strpadsize = 28
9+
numpadsize = 5
10+
11+
print_basic_info(graph, strpadsize, numpadsize)
12+
print_highest_degree_nodes(graph, strpadsize, numpadsize)
13+
print_most_used_functions(functionsdict, strpadsize, numpadsize)
14+
15+
16+
def print_basic_info(graph, strpadsize, numpadsize):
17+
print("=== Dependency Graph Summary ===")
18+
print(
19+
"Cell/Node count".ljust(strpadsize, " ")
20+
+ str(graph.number_of_nodes()).rjust(numpadsize, " ")
21+
)
22+
print(
23+
"Dependency count".ljust(strpadsize, " ")
24+
+ str(graph.number_of_edges()).rjust(numpadsize, " ")
25+
)
26+
print()
27+
28+
29+
def print_highest_degree_nodes(graph, strpadsize, numpadsize):
30+
print("\n=== Most connected nodes ===")
31+
degree_view = graph.degree()
32+
degree_counts = Counter(dict(degree_view))
33+
max_degree_node = degree_counts.most_common(10)
34+
35+
for node, degree in max_degree_node:
36+
print(f"{node.ljust(strpadsize)}{str(degree).rjust(numpadsize, ' ')} ")
37+
38+
39+
def print_most_used_functions(functionsdict, strpadsize, numpadsize):
40+
print("\n=== Most used functions ===")
41+
sorted_functions = dict(
42+
sorted(functionsdict.items(), key=lambda item: item[1], reverse=True)
43+
)
44+
45+
for function, count in sorted_functions.items():
46+
print(f"{function.ljust(strpadsize, ' ')}{str(count).rjust(numpadsize, ' ')}")

0 commit comments

Comments
 (0)