Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore parsing hidden files on kedro viz --lite #2271

Merged
merged 9 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Please follow the established format:

- Add ESM bundle for Kedro-Viz. (#2268)
- Fix `%run_viz` using old process in jupyter notebook. (#2267)
- Ignore parsing hidden files on `kedro viz --lite`. (#2271)
- Make Kedro-Viz compatible with the new `KedroDataCatalog`. (#2274)

## Community contributions
Expand Down
12 changes: 3 additions & 9 deletions package/kedro_viz/autoreload_file_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
from pathspec import GitIgnoreSpec
from watchfiles import Change, DefaultFilter

from kedro_viz.utils import load_gitignore_patterns

logger = logging.getLogger(__name__)


Expand All @@ -35,15 +37,7 @@ def __init__(self, base_path: Optional[Path] = None):
super().__init__()

# Load .gitignore patterns
gitignore_path = self.cwd / ".gitignore"
try:
with open(gitignore_path, "r", encoding="utf-8") as gitignore_file:
ignore_patterns = gitignore_file.read().splitlines()
self.gitignore_spec: Optional[GitIgnoreSpec] = GitIgnoreSpec.from_lines(
"gitwildmatch", ignore_patterns
)
except FileNotFoundError:
self.gitignore_spec = None
self.gitignore_spec = load_gitignore_patterns(self.cwd)

def __call__(self, change: Change, path: str) -> bool:
"""
Expand Down
21 changes: 18 additions & 3 deletions package/kedro_viz/integrations/kedro/lite_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from typing import Dict, List, Set, Union
from unittest.mock import MagicMock

from kedro_viz.utils import Spinner, is_file_ignored, load_gitignore_patterns

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -232,14 +234,22 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]:
Returns:
A dictionary of file(s) in the target path and a set of module names
"""
spinner = Spinner("Processing your project files...")
spinner.start()

if not target_path.exists():
logger.warning("Path `%s` does not exist", str(target_path))
return None

unresolved_imports: Dict[str, Set[str]] = {}

# Load .gitignore patterns
gitignore_spec = load_gitignore_patterns(target_path)

if target_path.is_file():
if is_file_ignored(target_path):
return unresolved_imports

try:
missing_dependencies = self._get_unresolved_imports(target_path)
if len(missing_dependencies) > 0:
Expand All @@ -254,7 +264,11 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]:
return unresolved_imports

# handling directories
_project_file_paths = set(target_path.rglob("*.py"))
_project_file_paths = set(
file_path
for file_path in target_path.rglob("*.py")
if not is_file_ignored(file_path, target_path, gitignore_spec)
)

for file_path in _project_file_paths:
try:
Expand All @@ -270,11 +284,12 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]:
if len(missing_dependencies) > 0:
unresolved_imports[str(file_path)] = missing_dependencies
except Exception as exc: # noqa: BLE001 # pragma: no cover
logger.error(
"An error occurred in LiteParser while mocking dependencies in %s : %s",
logger.warning(
"An issue occurred in LiteParser while mocking dependencies in %s : %s",
file_path,
exc,
)
continue

spinner.stop()
return unresolved_imports
35 changes: 34 additions & 1 deletion package/kedro_viz/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import threading
import time
from itertools import cycle
from typing import Any, Tuple
from pathlib import Path
from typing import Any, Optional, Tuple

from pathspec import GitIgnoreSpec

TRANSCODING_SEPARATOR = "@"

Expand Down Expand Up @@ -63,6 +66,36 @@ def is_dataset_param(dataset_name: str) -> bool:
return dataset_name.lower().startswith("params:") or dataset_name == "parameters"


def load_gitignore_patterns(project_path: Path) -> Optional[GitIgnoreSpec]:
"""Loads gitignore spec to detect ignored files"""
gitignore_path = project_path / ".gitignore"

if not gitignore_path.exists():
return None

with open(gitignore_path, "r", encoding="utf-8") as gitignore_file:
ignore_patterns = gitignore_file.read().splitlines()
gitignore_spec = GitIgnoreSpec.from_lines("gitwildmatch", ignore_patterns)
return gitignore_spec


def is_file_ignored(
file_path: Path,
project_path: Optional[Path] = None,
gitignore_spec: Optional[GitIgnoreSpec] = None,
) -> bool:
"""Returns True if the file should be ignored."""
if file_path.name.startswith("."): # Ignore hidden files/folders
return True
if (
gitignore_spec
and project_path
and gitignore_spec.match_file(str(file_path.relative_to(project_path)))
):
return True
return False


def merge_dicts(dict_one: dict[str, Any], dict_two: dict[str, Any]) -> dict[str, Any]:
"""Utility to merge two dictionaries"""
import copy
Expand Down
27 changes: 26 additions & 1 deletion package/tests/test_integrations/test_lite_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,34 @@ def sample_project_path(tmp_path):
# Create a sample directory structure
package_dir = tmp_path / "mock_spaceflights"
package_dir.mkdir()

venv_dir = package_dir / ".venv"
venv_dir.mkdir()

(package_dir / "__init__.py").touch()
(package_dir / "__init__.py").write_text(
"from mock_spaceflights import data_processing\n"
"from mock_spaceflights.data_processing import create_metrics"
)
(package_dir / ".test_hidden.py").write_text(
"import os\n"
"import nonexistentmodule\n"
"from . import test\n"
"from typing import Dict"
)
(venv_dir / "test_venv.py").write_text(
"import os\n"
"import nonexistentmodule\n"
"from . import test\n"
"from typing import Dict"
)
(package_dir / "data_processing.py").write_text(
"import os\n"
"import nonexistentmodule\n"
"from . import test\n"
"from typing import Dict"
)
(package_dir / ".gitignore").write_text("venv\n" ".venv")
return tmp_path


Expand Down Expand Up @@ -173,6 +190,12 @@ def test_parse_non_existent_path(self, lite_parser):
assert not lite_parser.parse(Path("non/existent/path"))
assert not lite_parser.parse(Path("non/existent/path/file.py"))

def test_parse_hidden_file_path(self, lite_parser, sample_project_path):
file_path = Path(sample_project_path / "mock_spaceflights/.test_hidden.py")

unresolved_imports = lite_parser.parse(file_path)
assert unresolved_imports == {}

def test_file_parse(self, lite_parser, sample_project_path):
file_path = Path(sample_project_path / "mock_spaceflights/data_processing.py")
unresolved_imports = lite_parser.parse(file_path)
Expand All @@ -199,7 +222,9 @@ def test_parse_logs_error_on_exception(self, lite_parser, tmp_path, caplog):
)

def test_directory_parse(self, lite_parser, sample_project_path):
unresolved_imports = lite_parser.parse(sample_project_path)
unresolved_imports = lite_parser.parse(
sample_project_path / "mock_spaceflights"
)
expected_file_path = Path(
sample_project_path / "mock_spaceflights/data_processing.py"
)
Expand Down