-
Notifications
You must be signed in to change notification settings - Fork 115
/
Copy pathlite_parser.py
executable file
·295 lines (240 loc) · 10.6 KB
/
lite_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
"""`kedro_viz.integrations.kedro.lite_parser` defines a Kedro parser using AST."""
import ast
import importlib.util
import logging
from pathlib import Path
from typing import Dict, List, Set, Union
from unittest.mock import MagicMock
from kedro_viz.utils import Spinner, is_file_ignored, load_gitignore_patterns
logger = logging.getLogger(__name__)
class LiteParser:
"""Represents a Kedro Parser which uses AST
Args:
package_name (Union[str, None]): The name of the current package
"""
def __init__(self, package_name: Union[str, None] = None) -> None:
self._package_name = package_name
@staticmethod
def _is_module_importable(module_name: str) -> bool:
"""Checks if a module is importable
Args:
module_name (str): The name of the module to check
importability
Returns:
Whether the module can be imported
"""
try:
# Check if the module can be importable
# In case of submodule (contains a dot, e.g: sklearn.linear_model),
# find_spec imports the parent module
if importlib.util.find_spec(module_name) is None:
return False
return True
except ModuleNotFoundError as mnf_exc:
logger.debug(
"ModuleNotFoundError in resolving %s : %s", module_name, mnf_exc
)
return False
except ImportError as imp_exc:
logger.debug("ImportError in resolving %s : %s", module_name, imp_exc)
return False
except ValueError as val_exc:
logger.debug("ValueError in resolving %s : %s", module_name, val_exc)
return False
except Exception as exc: # noqa: BLE001 # pragma: no cover
logger.debug(
"An exception occurred while resolving %s : %s", module_name, exc
)
return False
@staticmethod
def _get_module_parts(module_name: str) -> List[str]:
"""Creates a list of module parts to check for importability
Args:
module_name (str): The module name to split
Returns:
A list of module parts
Example:
>>> LiteParser._get_module_parts("kedro.framework.project")
["kedro", "kedro.framework", "kedro.framework.project"]
"""
module_split = module_name.split(".")
full_module_name = ""
module_parts = []
for idx, sub_module_name in enumerate(module_split):
full_module_name = (
sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}"
)
module_parts.append(full_module_name)
return module_parts
def _is_relative_import(self, module_name: str, project_file_paths: Set[Path]):
"""Checks if a module is a relative import. This is needed
in dev or standalone mode when the package_name is None and
internal package files have unresolved external dependencies
Args:
module_name (str): The name of the module to check
importability
project_file_paths (Set[Path]): A set of project file paths
Returns:
Whether the module is a relative import starting
from the root package dir
Example:
>>> lite_parser_obj = LiteParser()
>>> module_name = "kedro_project_package.pipelines.reporting.nodes"
>>> project_file_paths = set([Path("/path/to/relative/file")])
>>> lite_parser_obj._is_relative_import(module_name, project_file_paths)
True
"""
relative_module_path = str(Path(*module_name.split(".")))
# Check if the relative_module_path
# is a substring of current project file path
is_relative_import_path = any(
relative_module_path in str(project_file_path)
for project_file_path in project_file_paths
)
return is_relative_import_path
def _populate_missing_dependencies(
self, module_name: str, missing_dependencies: Set[str]
) -> None:
"""Helper to populate missing dependencies
Args:
module_name (str): The module name to check if it is importable
missing_dependencies (Set[str]): A set of missing dependencies
"""
module_name_parts = self._get_module_parts(module_name)
for module_name_part in module_name_parts:
if (
not self._is_module_importable(module_name_part)
and module_name_part not in missing_dependencies
):
missing_dependencies.add(module_name_part)
def _get_unresolved_imports(
self, file_path: Path, project_file_paths: Union[Set[Path], None] = None
) -> Set[str]:
"""Parse the file using AST and return any missing dependencies
in the current file
Args:
file_path (Path): The file path to parse
project_file_paths Union[Set[Path], None]: A set of project file paths
Returns:
A set of missing dependencies
"""
missing_dependencies: Set[str] = set()
# Read the file
with open(file_path, "r", encoding="utf-8") as file:
file_content = file.read()
# parse file content using ast
parsed_content_ast_node: ast.Module = ast.parse(file_content)
file_path = file_path.resolve()
# Explore each node in the AST tree
for node in ast.walk(parsed_content_ast_node):
# Handling dependencies that starts with "import "
# Example: import logging
# Corresponding AST node will be:
# Import(names=[alias(name='logging')])
if isinstance(node, ast.Import):
for alias in node.names:
module_name = alias.name
self._populate_missing_dependencies(
module_name, missing_dependencies
)
# Handling dependencies that starts with "from "
# Example: from typing import Dict, Union
# Corresponding AST node will be:
# ImportFrom(module='typing', names=[alias(name='Dict'),
# alias(name='Union')],
# level=0)
elif isinstance(node, ast.ImportFrom):
module_name = node.module if node.module else ""
level = node.level
# Ignore relative imports like "from . import a"
if not module_name:
continue
# Ignore relative imports within the package
# Examples:
# "from demo_project.pipelines.reporting import test",
# "from ..nodes import func_test"
if (self._package_name and self._package_name in module_name) or (
# dev or standalone mode
not self._package_name
and project_file_paths
and self._is_relative_import(module_name, project_file_paths)
):
continue
# absolute modules in the env
# Examples:
# from typing import Dict, Union
# from sklearn.linear_model import LinearRegression
if level == 0:
self._populate_missing_dependencies(
module_name, missing_dependencies
)
return missing_dependencies
def create_mock_modules(self, unresolved_imports: Set[str]) -> Dict[str, MagicMock]:
"""Creates mock modules for unresolved imports
Args:
unresolved_imports (Set[str]): A set of unresolved imports
Returns:
A dictionary of mocked modules for the unresolved imports
"""
mocked_modules: Dict[str, MagicMock] = {}
for unresolved_import in unresolved_imports:
mocked_modules[unresolved_import] = MagicMock()
return mocked_modules
def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]:
"""Parses the file(s) in the target path and returns
any unresolved imports for all the dependency errors
as a dictionary of file(s) in the target path and a set of module names
Args:
target_path (Path): The path to parse file(s)
Returns:
A dictionary of file(s) in the target path and a set of module names
"""
spinner = Spinner("Processing your project files...")
spinner.start()
if not target_path.exists():
logger.warning("Path `%s` does not exist", str(target_path))
return None
unresolved_imports: Dict[str, Set[str]] = {}
# Load .gitignore patterns
gitignore_spec = load_gitignore_patterns(target_path)
if target_path.is_file():
if is_file_ignored(target_path):
return unresolved_imports
try:
missing_dependencies = self._get_unresolved_imports(target_path)
if len(missing_dependencies) > 0:
unresolved_imports[str(target_path)] = missing_dependencies
except Exception as exc: # noqa: BLE001
logger.error(
"An error occurred in LiteParser while mocking dependencies in %s : %s",
target_path,
exc,
)
return unresolved_imports
# handling directories
_project_file_paths = set(
file_path
for file_path in target_path.rglob("*.py")
if not is_file_ignored(file_path, target_path, gitignore_spec)
)
for file_path in _project_file_paths:
try:
# Ensure the package name is in the file path
if self._package_name and self._package_name not in file_path.parts:
# we are only mocking the dependencies
# inside the package
continue
missing_dependencies = self._get_unresolved_imports(
file_path, _project_file_paths
)
if len(missing_dependencies) > 0:
unresolved_imports[str(file_path)] = missing_dependencies
except Exception as exc: # noqa: BLE001 # pragma: no cover
logger.warning(
"An issue occurred in LiteParser while mocking dependencies in %s : %s",
file_path,
exc,
)
continue
spinner.stop()
return unresolved_imports