Skip to content

Commit e117366

Browse files
authoredSep 29, 2024··
Merge pull request #3 from dalager/Refactor
Refactoring, cleanup and test improvements
2 parents 947b774 + d10f1a9 commit e117366

6 files changed

+313
-295
lines changed
 

‎.flake8

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[flake8]
2+
max-line-length = 100

‎README.md

+19-18
Original file line numberDiff line numberDiff line change
@@ -79,24 +79,25 @@ In [graph_visualizer.py](graph_visualizer.py) you will find three configuration
7979
The following is the output of running the script on the provided `Book1.xlsx` file.
8080

8181
```bash
82-
=== Dependency Graph Summary ===
83-
Cell/Node count 50
84-
Dependency count 60
85-
86-
=== Nodes with the highest degree ===
87-
Range!F1 10
88-
Base!B5 4
89-
Base!B12 3
90-
Base!B17 3
91-
Base!I21 3
92-
Base!G22 3
93-
Base!B22 3
94-
Base!B28 3
95-
Range!B2 3
96-
Range!B3 3
97-
98-
=== Formula functions by count ===
99-
SUM 3
82+
=== Dependency Graph Summary ===
83+
Cell/Node count 70
84+
Dependency count 100
85+
86+
87+
=== Most connected nodes ===
88+
Range Madness!A2:A11 22
89+
Range Madness!B2:B11 11
90+
Range Madness!F1 10
91+
Main Sheet!B5 4
92+
Main Sheet!B22 4
93+
Detached !A2:A4 4
94+
Range Madness!B2 4
95+
Range Madness!B3 4
96+
Range Madness!B4 4
97+
Range Madness!B5 4
98+
99+
=== Most used functions ===
100+
SUM 4
100101
POWER 1
101102

102103
Visualizing the graph of dependencies.

‎excel_parser.py

+29-17
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,75 @@
11
from openpyxl.utils import get_column_letter, range_boundaries
22
import re
3-
3+
from typing import List, Tuple, Dict
44

55
# Regex to detect cell references like A1, B2, or ranges like A1:B2
66
CELL_REF_REGEX = r"('?[A-Za-z0-9_\-\[\] ]+'?![A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)|([A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)" # noqa
77

88

9-
def expand_range(range_ref):
9+
def expand_range(range_reference: str) -> List[str]:
1010
"""
1111
Expand a range reference (e.g., 'A1:A3') into a list of individual cell references.
12-
"""
1312
13+
Args:
14+
range_ref (str): The range reference to expand.
15+
16+
Returns:
17+
List[str]: A list of individual cell references.
18+
"""
1419
# if there is a sheet name in the range reference, put it away for now
15-
if "!" in range_ref:
16-
sheet_name, range_ref = range_ref.split("!")
20+
if "!" in range_reference:
21+
sheet_name, range_reference = range_reference.split("!")
1722
else:
1823
sheet_name = None
1924

20-
min_col, min_row, max_col, max_row = range_boundaries(range_ref)
25+
min_col, min_row, max_col, max_row = range_boundaries(range_reference)
2126
expanded_cells = []
2227

2328
# Loop over rows and columns in the range
2429
for row in range(min_row, max_row + 1):
2530
for col in range(min_col, max_col + 1):
26-
# if sheetname is set
31+
cell_ref = f"{get_column_letter(col)}{row}"
2732
if sheet_name:
28-
expanded_cells.append(f"{sheet_name}!{get_column_letter(col)}{row}")
33+
expanded_cells.append(f"{sheet_name}!{cell_ref}")
2934
else:
30-
expanded_cells.append(f"{get_column_letter(col)}{row}")
35+
expanded_cells.append(cell_ref)
3136

3237
return expanded_cells
3338

3439

35-
def extract_references(formula):
40+
def extract_references(formula: str) -> Tuple[List[str], List[str], Dict[str, str]]:
3641
"""
3742
Extract all referenced cells and ranges from a formula using regular expressions.
3843
This returns a list of both individual cells and range references.
44+
45+
Args:
46+
formula (str): The formula to extract references from.
47+
48+
Returns:
49+
Tuple[List[str], List[str], Dict[str, str]]: A tuple containing lists of direct references,
50+
range references, and a dictionary of dependencies.
3951
"""
4052
formula = formula.replace("$", "")
4153
matches = re.findall(CELL_REF_REGEX, formula)
4254
references = [match[0] if match[0] else match[2] for match in matches]
4355

44-
# trim the extracted references
56+
# Trim the extracted references
4557
references = [ref.strip() for ref in references]
4658

4759
expanded_references = []
4860
dependencies = {}
4961
direct_references = []
5062
range_references = []
5163

52-
for ref in references:
53-
if ":" in ref: # it's a range like A1:A3
54-
expanded_cells = expand_range(ref)
64+
for reference in references:
65+
if ":" in reference: # it's a range like A1:A3
66+
expanded_cells = expand_range(reference)
5567
expanded_references.extend(expanded_cells)
56-
range_references.append(ref)
68+
range_references.append(reference)
5769
# Store the range-to-cells relationship
5870
for cell in expanded_cells:
59-
dependencies[cell] = ref
71+
dependencies[cell] = reference
6072
else: # single cell
61-
direct_references.append(ref)
73+
direct_references.append(reference)
6274

6375
return direct_references, range_references, dependencies

‎graph_summarizer.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from collections import Counter
2+
3+
4+
def print_summary(graph, functionsdict):
5+
"""
6+
Summarize a networkx DiGraph representing a dependency graph and print the most used functions in the formulas.
7+
"""
8+
strpadsize = 28
9+
numpadsize = 5
10+
11+
print_basic_info(graph, strpadsize, numpadsize)
12+
print_highest_degree_nodes(graph, strpadsize, numpadsize)
13+
print_most_used_functions(functionsdict, strpadsize, numpadsize)
14+
15+
16+
def print_basic_info(graph, strpadsize, numpadsize):
17+
print("=== Dependency Graph Summary ===")
18+
print(
19+
"Cell/Node count".ljust(strpadsize, " ")
20+
+ str(graph.number_of_nodes()).rjust(numpadsize, " ")
21+
)
22+
print(
23+
"Dependency count".ljust(strpadsize, " ")
24+
+ str(graph.number_of_edges()).rjust(numpadsize, " ")
25+
)
26+
print()
27+
28+
29+
def print_highest_degree_nodes(graph, strpadsize, numpadsize):
30+
print("\n=== Most connected nodes ===")
31+
degree_view = graph.degree()
32+
degree_counts = Counter(dict(degree_view))
33+
max_degree_node = degree_counts.most_common(10)
34+
35+
for node, degree in max_degree_node:
36+
print(f"{node.ljust(strpadsize)}{str(degree).rjust(numpadsize, ' ')} ")
37+
38+
39+
def print_most_used_functions(functionsdict, strpadsize, numpadsize):
40+
print("\n=== Most used functions ===")
41+
sorted_functions = dict(
42+
sorted(functionsdict.items(), key=lambda item: item[1], reverse=True)
43+
)
44+
45+
for function, count in sorted_functions.items():
46+
print(f"{function.ljust(strpadsize, ' ')}{str(count).rjust(numpadsize, ' ')}")

‎graphbuilder.py

+102-130
Original file line numberDiff line numberDiff line change
@@ -2,66 +2,59 @@
22
This script extracts formulas from an Excel file and builds a dependency graph.
33
"""
44

5+
from typing import List, Dict
56
from openpyxl import load_workbook
6-
from collections import Counter
77
import networkx as nx
88
import re
99
import sys
1010
from graph_visualizer import visualize_dependency_graph
11+
from graph_summarizer import print_summary
1112
from excel_parser import extract_references
1213

13-
# dictionary that stores the uniqe functions used in the formulas
14-
# the key will be the funciton name and the value will be the number of times it was used
15-
functions_dict = {}
14+
# Dictionary that stores the unique functions used in the formulas
15+
# The key will be the function name and the value will be the number of times it was used
16+
functions_dict: Dict[str, int] = {}
1617

1718

18-
def log(msg):
19+
def log(msg: str) -> None:
1920
"""
2021
Log a message to the console if verbosity is enabled using the --verbose flag.
2122
"""
22-
# if verbosity is enabled
23-
2423
if "--verbose" in sys.argv:
2524
print(msg)
2625

2726

28-
def sanitize_sheetname(sheetname):
27+
def sanitize_sheetname(sheetname: str) -> str:
2928
"""
3029
Remove any special characters from the sheet name.
3130
"""
3231
return sheetname.replace("'", "")
3332

3433

35-
def sanitize_range(rangestring):
34+
def sanitize_range(rangestring: str) -> str:
3635
"""
3736
Remove any special characters from the range.
3837
"""
3938
if "!" in rangestring:
40-
sheet = rangestring.split("!")[0].replace("'", "")
41-
range = rangestring.split("!")[1]
42-
43-
return f"{sheet}!{range}"
39+
sheet, range_ = rangestring.split("!")
40+
sheet = sheet.replace("'", "")
41+
return f"{sheet}!{range_}"
42+
return rangestring
4443

4544

46-
def stat_functions(cellvalue):
45+
def stat_functions(cellvalue: str) -> None:
4746
"""
4847
Extract the functions used in the formula and store them in a dictionary.
4948
This will be used to print the most used functions in the formulas.
5049
"""
51-
52-
# functions used in the formula
5350
cellfuncs = re.findall(r"[A-Z]+\(", cellvalue)
5451
log(f" Functions used: {functions_dict}")
5552
for function in cellfuncs:
56-
# remove the "(" from the function name
57-
function = function[:-1]
58-
if function in functions_dict:
59-
functions_dict[function] += 1
60-
else:
61-
functions_dict[function] = 1
53+
function = function[:-1] # Remove the "(" from the function name
54+
functions_dict[function] = functions_dict.get(function, 0) + 1
6255

6356

64-
def add_node(graph, node, sheet):
57+
def add_node(graph: nx.DiGraph, node: str, sheet: str) -> None:
6558
"""
6659
Add a node to the graph with the specified sheet name.
6760
"""
@@ -70,135 +63,114 @@ def add_node(graph, node, sheet):
7063
graph.add_node(node, sheet=sheet)
7164

7265

73-
def extract_formulas_and_build_dependencies(file_path):
66+
def extract_formulas_and_build_dependencies(file_path: str) -> nx.DiGraph:
7467
"""
7568
Extract formulas from an Excel file and build a dependency graph.
7669
"""
70+
try:
71+
wb = load_workbook(file_path, data_only=False)
72+
except Exception as e:
73+
log(f"Error loading workbook: {e}")
74+
sys.exit(1)
7775

78-
# Load the workbook
79-
wb = load_workbook(file_path, data_only=False)
80-
81-
# Create a directed graph for dependencies
8276
graph = nx.DiGraph()
8377

84-
# Iterate over all sheets
8578
for sheet_name in wb.sheetnames:
8679
ws = wb[sheet_name]
8780
log(f"========== Analyzing sheet: {sheet_name} ==========")
88-
sheet_name = sanitize_sheetname(sheet_name)
89-
# Iterate over all cells in the sheet and extract formulas
90-
for row in ws.iter_rows():
91-
for cell in row:
92-
# only interested in cells with formulas
93-
if isinstance(cell.value, str) and cell.value.startswith("="):
94-
# collect functions usage statistics
95-
stat_functions(cell.value)
96-
97-
current_cell = f"{sheet_name}!{cell.coordinate}"
98-
log(f"Formula in {current_cell}: {cell.value}")
99-
100-
add_node(graph, current_cell, sheet_name)
101-
102-
# Extract all referenced cells and ranges from the formula
103-
direct_references, range_references, range_dependencies = (
104-
extract_references(cell.value)
105-
)
106-
107-
# all the referenced cells and cells from expanded ranges
108-
# is added to the graph as nodes and edges
109-
for ref_cell in direct_references:
110-
if "!" not in ref_cell:
111-
# No sheet specified, assume current sheet
112-
refc = f"{sheet_name}!{ref_cell}"
113-
else:
114-
# remove ' from sheet name
115-
ref_cell = ref_cell.replace("'", "")
116-
refc = ref_cell
117-
118-
log(f" Cell: {refc}")
119-
# add the node
120-
add_node(graph, refc, sheet_name)
121-
# add the edge
122-
graph.add_edge(current_cell, refc)
123-
124-
# If a range like A1:B3 is referenced, add the range definition as a node
125-
for rng in range_references:
126-
log(f" Range: {rng}")
127-
128-
if "!" not in rng:
129-
rng = f"{sheet_name}!{rng}"
130-
range_sheet = sheet_name
131-
else:
132-
rng = sanitize_range(rng)
133-
range_sheet = rng.split("!")[0]
134-
135-
add_node(graph, rng, range_sheet)
136-
graph.add_edge(current_cell, rng)
137-
138-
# If a range like A1:B3 is referenced, add the
139-
# edge between the cells within that range and
140-
# the range istself
141-
for single_cell, range_ref in range_dependencies.items():
142-
if "!" not in range_ref:
143-
range_ref = f"{sheet_name}!{range_ref}"
144-
range_sheet = sheet_name
145-
else:
146-
range_ref = sanitize_range(range_ref)
147-
range_sheet = range_ref.split("!")[0]
148-
149-
if "!" not in single_cell:
150-
single_cell = f"{sheet_name}!{single_cell}"
151-
cell_sheet = sheet_name
152-
else:
153-
single_cell = single_cell
154-
cell_sheet = single_cell.split("!")[0]
155-
156-
# this is the single cell that points to the range it belongs to
157-
add_node(graph, single_cell, cell_sheet)
158-
add_node(graph, range_ref, range_sheet)
159-
160-
# Then add the edge between the single cell and the range
161-
graph.add_edge(range_ref, single_cell)
81+
sanitized_sheet_name = sanitize_sheetname(sheet_name)
82+
process_sheet(ws, sanitized_sheet_name, graph)
83+
16284
return graph
16385

16486

165-
def print_summary(graph, functionsdict):
87+
def process_sheet(ws, sheet_name: str, graph: nx.DiGraph) -> None:
16688
"""
167-
Summarize a networkx DiGraph representing a dependency graph. And print the most used functions in the formulas
89+
Process a sheet and add references to the graph.
16890
"""
91+
for row in ws.iter_rows():
92+
for cell in row:
93+
if isinstance(cell.value, str) and cell.value.startswith("="):
94+
process_formula_cell(cell, sheet_name, graph)
16995

170-
strpadsize = 28
171-
numpadsize = 5
172-
# 1. Print basic information about the graph
17396

174-
print("=== Dependency Graph Summary ===")
175-
print(
176-
"Cell/Node count".ljust(strpadsize, " ")
177-
+ str(graph.number_of_nodes()).rjust(numpadsize, " ")
178-
)
179-
print(
180-
"Dependency count".ljust(strpadsize, " ")
181-
+ str(graph.number_of_edges()).rjust(numpadsize, " ")
97+
def process_formula_cell(cell, sheet_name: str, graph: nx.DiGraph) -> None:
98+
"""
99+
Process a cell containing a formula.
100+
"""
101+
stat_functions(cell.value)
102+
cell_reference = f"{sheet_name}!{cell.coordinate}"
103+
log(f"Formula in {cell_reference}: {cell.value}")
104+
add_node(graph, cell_reference, sheet_name)
105+
106+
direct_references, range_references, range_dependencies = extract_references(
107+
cell.value
182108
)
183-
print()
109+
add_references_to_graph(direct_references, cell_reference, sheet_name, graph)
110+
add_ranges_to_graph(range_references, cell_reference, sheet_name, graph)
111+
add_range_dependencies_to_graph(range_dependencies, sheet_name, graph)
112+
113+
114+
def add_references_to_graph(
115+
references: List[str], current_cell: str, sheet_name: str, graph: nx.DiGraph
116+
) -> None:
117+
"""
118+
Add direct cell references to the graph.
119+
"""
120+
for cell_reference in references:
121+
cell_reference = format_reference(cell_reference, sheet_name)
122+
log(f" Cell: {cell_reference}")
123+
add_node(graph, cell_reference, sheet_name)
124+
graph.add_edge(current_cell, cell_reference)
184125

185-
# 2. Print the nodes with the highest degree
186-
degree_view = graph.degree()
187126

188-
degree_counts = Counter(dict(degree_view))
189-
max_degree_node = degree_counts.most_common(10)
190-
print("=== Nodes with the highest degree ===")
191-
for node, degree in max_degree_node:
192-
print(f"{node.ljust(strpadsize)}{str(degree).rjust(numpadsize, ' ')} ")
127+
def add_ranges_to_graph(
128+
ranges: List[str], current_cell: str, sheet_name: str, graph: nx.DiGraph
129+
) -> None:
130+
"""
131+
Add range references to the graph.
132+
"""
133+
for range_reference in ranges:
134+
range_sheet_name = get_range_sheet_name(range_reference, sheet_name)
135+
range_reference = format_reference(range_reference, sheet_name)
136+
log(f" Range: {range_reference}")
137+
add_node(graph, range_reference, range_sheet_name)
138+
graph.add_edge(current_cell, range_reference)
139+
140+
141+
def add_range_dependencies_to_graph(
142+
range_dependencies: Dict[str, str], sheet_name: str, graph: nx.DiGraph
143+
) -> None:
144+
"""
145+
Add dependencies between ranges and cells.
146+
"""
147+
for cell_reference, range_reference in range_dependencies.items():
148+
range_reference = format_reference(range_reference, sheet_name)
149+
cell_reference = format_reference(cell_reference, sheet_name)
150+
range_sheet_name = range_reference.split("!")[0]
151+
cell_sheet_name = cell_reference.split("!")[0]
152+
153+
add_node(graph, cell_reference, cell_sheet_name)
154+
add_node(graph, range_reference, range_sheet_name)
155+
graph.add_edge(range_reference, cell_reference)
193156

194-
# 3. Print the most used functions
195-
print("\n=== Formula functions by count ===")
196-
sorted_functions = dict(
197-
sorted(functionsdict.items(), key=lambda item: item[1], reverse=True)
157+
158+
def format_reference(reference: str, sheet_name: str) -> str:
159+
"""
160+
Format a cell or range reference to include the sheet name if not already present.
161+
"""
162+
return (
163+
f"{sheet_name}!{reference}"
164+
if "!" not in reference
165+
else reference.replace("'", "")
198166
)
199167

200-
for function, count in sorted_functions.items():
201-
print(f"{function.ljust(strpadsize, ' ')}{str(count).rjust(numpadsize, ' ')}")
168+
169+
def get_range_sheet_name(range_reference: str, sheet_name: str) -> str:
170+
"""
171+
Get the sheet name for a range reference.
172+
"""
173+
return sheet_name if "!" not in range_reference else range_reference.split("!")[0]
202174

203175

204176
if __name__ == "__main__":

‎test_cell_reference_extraction.py

+115-130
Original file line numberDiff line numberDiff line change
@@ -1,161 +1,146 @@
1-
from excel_parser import (
2-
extract_references,
3-
expand_range,
4-
) # Ensure you replace this with the actual name of your module
5-
6-
7-
# def test_expand_range():
8-
# formula = "=Sum(A1:A3)"
9-
# expected_references = ["A1", "A2", "A3"]
10-
# direct_references, deps = extract_references(formula)
11-
# assert (
12-
# direct_references == expected_references
13-
# ), f"Expected {expected_references}, but got {direct_references}"
14-
15-
16-
# def test_expand_range_other_sheet():
17-
# formula = "=Sum(Other!A1:A3)"
18-
# expected_references = ["Other!A1", "Other!A2", "Other!A3"]
19-
# direct_references, deps = extract_references(formula)
20-
# assert (
21-
# direct_references == expected_references
22-
# ), f"Expected {expected_references}, but got {direct_references}"
23-
24-
25-
# # Test for simple references like B4, A5
26-
def test_simple_references():
27-
formula = "=B4+A5"
28-
expected_references = ["B4", "A5"]
29-
direct_references, range_references, deps = extract_references(formula)
30-
assert (
31-
direct_references == expected_references
32-
), f"Expected {expected_references}, but got {direct_references}"
33-
34-
35-
# # Test for local range references like A2:A11
36-
def test_local_range_references():
37-
formula = "=SUM(A2:A4)"
38-
expected_references = ["A2:A4"]
39-
direct_references, range_refs, deps = extract_references(formula)
40-
assert (
41-
range_refs == expected_references
42-
), f"Expected {expected_references}, but got {direct_references}"
1+
import pytest
2+
from excel_parser import extract_references
433

444

45-
# Test for simple absolute references like $A$1, $B$2
46-
def test_absolute_references():
47-
formula = "=$A$1+$B$2"
48-
expected_references = ["A1", "B2"]
49-
direct_references, range_refs, deps = extract_references(formula)
5+
# Helper function to assert references
6+
def assert_references(formula, expected_direct, expected_range, expected_deps):
7+
direct_references, range_references, deps = extract_references(formula)
508
assert (
51-
direct_references == expected_references
52-
), f"Expected {expected_references}, but got {direct_references}"
53-
54-
55-
# Test for sheet qualified absolute references like Sheet2!$A$1, Sheet2!$B$2
56-
def test_sheet_qualified_absolute_references():
57-
formula = "=Sheet2!$A$1+Sheet2!$B$2"
58-
expected_references = ["Sheet2!A1", "Sheet2!B2"]
59-
direct_references, range_refs, deps = extract_references(formula)
9+
direct_references == expected_direct
10+
), f"Expected {expected_direct}, but got {direct_references}"
6011
assert (
61-
direct_references == expected_references
62-
), f"Expected {expected_references}, but got {direct_references}"
12+
range_references == expected_range
13+
), f"Expected {expected_range}, but got {range_references}"
14+
assert deps == expected_deps, f"Expected {expected_deps}, but got {deps}"
15+
16+
17+
@pytest.mark.parametrize(
18+
"formula, expected_direct, expected_range, expected_deps",
19+
[
20+
# Test for simple references like B4, A5
21+
("=B4+A5", ["B4", "A5"], [], {}),
22+
# Test for local range references like A2:A11
23+
("=SUM(A2:A4)", [], ["A2:A4"], {"A2": "A2:A4", "A3": "A2:A4", "A4": "A2:A4"}),
24+
# Test for simple absolute references like $A$1, $B$2
25+
("=$A$1+$B$2", ["A1", "B2"], [], {}),
26+
# Test for sheet qualified absolute references like Sheet2!$A$1, Sheet2!$B$2
27+
("=Sheet2!$A$1+Sheet2!$B$2", ["Sheet2!A1", "Sheet2!B2"], [], {}),
28+
],
29+
)
30+
def test_references(formula, expected_direct, expected_range, expected_deps):
31+
"""
32+
Test various cell and range references.
33+
"""
34+
assert_references(formula, expected_direct, expected_range, expected_deps)
6335

6436

6537
# Test for sheet-qualified absolute range references like Sheet2!$A$1:$A$10
6638
def test_sheet_qualified_absolute_range_references():
39+
"""
40+
Test for sheet-qualified absolute range references like Sheet2!$A$2:$A$5.
41+
"""
6742
formula = "=SUM(Sheet2!$A$2:$A$5)"
68-
expected_references = [
69-
"Sheet2!A2:A5",
70-
]
71-
direct_references, range_refs, deps = extract_references(formula)
72-
assert (
73-
range_refs == expected_references
74-
), f"Expected {expected_references}, but got {direct_references}"
43+
expected_direct = []
44+
expected_range = ["Sheet2!A2:A5"]
45+
expected_deps = {
46+
"Sheet2!A2": "Sheet2!A2:A5",
47+
"Sheet2!A3": "Sheet2!A2:A5",
48+
"Sheet2!A4": "Sheet2!A2:A5",
49+
"Sheet2!A5": "Sheet2!A2:A5",
50+
}
51+
assert_references(formula, expected_direct, expected_range, expected_deps)
7552

7653

7754
# Test for sheet-qualified cell like Sheet2!C5
7855
def test_sheet_qualified_reference():
56+
"""
57+
Test for sheet-qualified cell like Sheet2!C5.
58+
"""
7959
formula = "=Sheet2!C5"
80-
expected_references = ["Sheet2!C5"]
81-
direct_references, rr, deps = extract_references(formula)
82-
assert (
83-
direct_references == expected_references
84-
), f"Expected {expected_references}, but got {direct_references}"
60+
expected_direct = ["Sheet2!C5"]
61+
expected_range = []
62+
expected_deps = {}
63+
assert_references(formula, expected_direct, expected_range, expected_deps)
8564

8665

66+
# Test for expanded range in dependencies
8767
def test_expanded_range_in_dependencies():
68+
"""
69+
Test for expanded range in dependencies like A1:A3.
70+
"""
8871
formula = "=SUM(A1:A3)"
89-
expected_references = ["A1", "A2", "A3"]
90-
direct_references, range_refs, deps = extract_references(formula)
91-
assert deps == {
92-
"A1": "A1:A3",
93-
"A2": "A1:A3",
94-
"A3": "A1:A3",
95-
}, f"Expected {expected_references}, but got {deps}"
72+
expected_direct = []
73+
expected_range = ["A1:A3"]
74+
expected_deps = {"A1": "A1:A3", "A2": "A1:A3", "A3": "A1:A3"}
75+
assert_references(formula, expected_direct, expected_range, expected_deps)
9676

9777

78+
# Test for no direct but only range references
9879
def test_no_direct_but_only_range_references():
80+
"""
81+
Test for no direct references but only range references like A1:A3.
82+
"""
9983
formula = "=SUM(A1:A3)"
100-
direct_references, range_refs, deps = extract_references(formula)
101-
assert (
102-
direct_references == []
103-
), f"Expected no direct references, but got {direct_references}"
84+
expected_direct = []
85+
expected_range = ["A1:A3"]
86+
expected_deps = {"A1": "A1:A3", "A2": "A1:A3", "A3": "A1:A3"}
87+
assert_references(formula, expected_direct, expected_range, expected_deps)
10488

10589

90+
# Test for two ranges
10691
def test_two_ranges():
92+
"""
93+
Test for two ranges like A1:A3 and B1:B3.
94+
"""
10795
formula = "=SUM(A1:A3) + SUM(B1:B3)"
108-
direct_references, range_refs, deps = extract_references(formula)
109-
assert range_refs == [
110-
"A1:A3",
111-
"B1:B3",
112-
], f"Expected ['A1:A3', 'B1:B3'], but got {range_refs}"
96+
expected_direct = []
97+
expected_range = ["A1:A3", "B1:B3"]
98+
expected_deps = {
99+
"A1": "A1:A3",
100+
"A2": "A1:A3",
101+
"A3": "A1:A3",
102+
"B1": "B1:B3",
103+
"B2": "B1:B3",
104+
"B3": "B1:B3",
105+
}
106+
assert_references(formula, expected_direct, expected_range, expected_deps)
113107

114108

115109
# Test for sheet-qualified range like Sheet2!A1:B10
116110
def test_sheet_qualified_range():
111+
"""
112+
Test for sheet-qualified range like Sheet2!A1:B3.
113+
"""
117114
formula = "=SUM(Sheet2!A1:B3)"
118-
expected_references = [
119-
"Sheet2!A1:B3",
120-
]
121-
direct_references, rr, deps = extract_references(formula)
122-
assert (
123-
rr == expected_references
124-
), f"Expected {expected_references}, but got {direct_references}"
125-
126-
127-
# # Test for mixed references with both local and sheet-qualified cells
128-
# def test_mixed_references():
129-
# formula = "=SUM(Sheet2!A1:B3, A5) + Sheet2!C5 + B6"
130-
# expected_references = [
131-
# "Sheet2!A1",
132-
# "Sheet2!B1",
133-
# "Sheet2!A2",
134-
# "Sheet2!B2",
135-
# "Sheet2!A3",
136-
# "Sheet2!B3",
137-
# "A5",
138-
# "Sheet2!C5",
139-
# "B6",
140-
# ]
141-
# direct_references, deps = extract_references(formula)
142-
# assert (
143-
# direct_references == expected_references
144-
# ), f"Expected {expected_references}, but got {direct_references}"
145-
146-
147-
# # Test for sheet-qualified ranges and mix of cell references
148-
# def test_mixed_ranges_and_cells():
149-
# formula = "=A1+Sheet1!B1+B10+SUM(Sheet2!A1:A3)"
150-
# expected_references = [
151-
# "A1",
152-
# "Sheet1!B1",
153-
# "B10",
154-
# "Sheet2!A1",
155-
# "Sheet2!A2",
156-
# "Sheet2!A3",
157-
# ]
158-
# direct_references, deps = extract_references(formula)
159-
# assert (
160-
# direct_references == expected_references
161-
# ), f"Expected {expected_references}, but got {direct_references}"
115+
expected_direct = []
116+
expected_range = ["Sheet2!A1:B3"]
117+
118+
expected_deps = {
119+
"Sheet2!A1": "Sheet2!A1:B3",
120+
"Sheet2!A2": "Sheet2!A1:B3",
121+
"Sheet2!A3": "Sheet2!A1:B3",
122+
"Sheet2!B1": "Sheet2!A1:B3",
123+
"Sheet2!B2": "Sheet2!A1:B3",
124+
"Sheet2!B3": "Sheet2!A1:B3",
125+
}
126+
127+
assert_references(formula, expected_direct, expected_range, expected_deps)
128+
129+
130+
# Test for mixed references with both local and sheet-qualified cells
131+
def test_mixed_references():
132+
"""
133+
Test for mixed references with both local and sheet-qualified cells.
134+
"""
135+
formula = "=SUM(Sheet2!A1:B3, A5) + Sheet2!C5 + B6"
136+
expected_direct = ["A5", "Sheet2!C5", "B6"]
137+
expected_range = ["Sheet2!A1:B3"]
138+
expected_deps = {
139+
"Sheet2!A1": "Sheet2!A1:B3",
140+
"Sheet2!A2": "Sheet2!A1:B3",
141+
"Sheet2!A3": "Sheet2!A1:B3",
142+
"Sheet2!B1": "Sheet2!A1:B3",
143+
"Sheet2!B2": "Sheet2!A1:B3",
144+
"Sheet2!B3": "Sheet2!A1:B3",
145+
}
146+
assert_references(formula, expected_direct, expected_range, expected_deps)

0 commit comments

Comments
 (0)
Please sign in to comment.