|
1 | 1 | from openpyxl.utils import get_column_letter, range_boundaries
|
2 | 2 | import re
|
3 |
| - |
| 3 | +from typing import List, Tuple, Dict |
4 | 4 |
|
5 | 5 | # Regex to detect cell references like A1, B2, or ranges like A1:B2
|
6 | 6 | CELL_REF_REGEX = r"('?[A-Za-z0-9_\-\[\] ]+'?![A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)|([A-Z]{1,3}[0-9]+(:[A-Z]{1,3}[0-9]+)?)" # noqa
|
7 | 7 |
|
8 | 8 |
|
9 |
| -def expand_range(range_ref): |
| 9 | +def expand_range(range_reference: str) -> List[str]: |
10 | 10 | """
|
11 | 11 | Expand a range reference (e.g., 'A1:A3') into a list of individual cell references.
|
12 |
| - """ |
13 | 12 |
|
| 13 | + Args: |
| 14 | + range_ref (str): The range reference to expand. |
| 15 | +
|
| 16 | + Returns: |
| 17 | + List[str]: A list of individual cell references. |
| 18 | + """ |
14 | 19 | # if there is a sheet name in the range reference, put it away for now
|
15 |
| - if "!" in range_ref: |
16 |
| - sheet_name, range_ref = range_ref.split("!") |
| 20 | + if "!" in range_reference: |
| 21 | + sheet_name, range_reference = range_reference.split("!") |
17 | 22 | else:
|
18 | 23 | sheet_name = None
|
19 | 24 |
|
20 |
| - min_col, min_row, max_col, max_row = range_boundaries(range_ref) |
| 25 | + min_col, min_row, max_col, max_row = range_boundaries(range_reference) |
21 | 26 | expanded_cells = []
|
22 | 27 |
|
23 | 28 | # Loop over rows and columns in the range
|
24 | 29 | for row in range(min_row, max_row + 1):
|
25 | 30 | for col in range(min_col, max_col + 1):
|
26 |
| - # if sheetname is set |
| 31 | + cell_ref = f"{get_column_letter(col)}{row}" |
27 | 32 | if sheet_name:
|
28 |
| - expanded_cells.append(f"{sheet_name}!{get_column_letter(col)}{row}") |
| 33 | + expanded_cells.append(f"{sheet_name}!{cell_ref}") |
29 | 34 | else:
|
30 |
| - expanded_cells.append(f"{get_column_letter(col)}{row}") |
| 35 | + expanded_cells.append(cell_ref) |
31 | 36 |
|
32 | 37 | return expanded_cells
|
33 | 38 |
|
34 | 39 |
|
35 |
| -def extract_references(formula): |
| 40 | +def extract_references(formula: str) -> Tuple[List[str], List[str], Dict[str, str]]: |
36 | 41 | """
|
37 | 42 | Extract all referenced cells and ranges from a formula using regular expressions.
|
38 | 43 | This returns a list of both individual cells and range references.
|
| 44 | +
|
| 45 | + Args: |
| 46 | + formula (str): The formula to extract references from. |
| 47 | +
|
| 48 | + Returns: |
| 49 | + Tuple[List[str], List[str], Dict[str, str]]: A tuple containing lists of direct references, |
| 50 | + range references, and a dictionary of dependencies. |
39 | 51 | """
|
40 | 52 | formula = formula.replace("$", "")
|
41 | 53 | matches = re.findall(CELL_REF_REGEX, formula)
|
42 | 54 | references = [match[0] if match[0] else match[2] for match in matches]
|
43 | 55 |
|
44 |
| - # trim the extracted references |
| 56 | + # Trim the extracted references |
45 | 57 | references = [ref.strip() for ref in references]
|
46 | 58 |
|
47 | 59 | expanded_references = []
|
48 | 60 | dependencies = {}
|
49 | 61 | direct_references = []
|
50 | 62 | range_references = []
|
51 | 63 |
|
52 |
| - for ref in references: |
53 |
| - if ":" in ref: # it's a range like A1:A3 |
54 |
| - expanded_cells = expand_range(ref) |
| 64 | + for reference in references: |
| 65 | + if ":" in reference: # it's a range like A1:A3 |
| 66 | + expanded_cells = expand_range(reference) |
55 | 67 | expanded_references.extend(expanded_cells)
|
56 |
| - range_references.append(ref) |
| 68 | + range_references.append(reference) |
57 | 69 | # Store the range-to-cells relationship
|
58 | 70 | for cell in expanded_cells:
|
59 |
| - dependencies[cell] = ref |
| 71 | + dependencies[cell] = reference |
60 | 72 | else: # single cell
|
61 |
| - direct_references.append(ref) |
| 73 | + direct_references.append(reference) |
62 | 74 |
|
63 | 75 | return direct_references, range_references, dependencies
|
0 commit comments