Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

requirement, resolvelib: Hash package contents to verify requirement file hashes #462

Merged
merged 28 commits into from
Jan 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
62be1cb
requirement, resolvelib: Hash package contents to verify requirement …
tetsuo-cpp Jan 4, 2023
5680586
resolvelib: Use the `RequirementHashes` type
tetsuo-cpp Jan 4, 2023
c6a1621
requirement: Propagate error messages out of requirement source
tetsuo-cpp Jan 5, 2023
76648e7
resolvelib: Use project as candidate name so we're able to match
tetsuo-cpp Jan 5, 2023
698d897
_dependency_source: Create a `RequirementHashes` class
tetsuo-cpp Jan 5, 2023
630e203
resolvelib: Create more specific exception types for hash errors
tetsuo-cpp Jan 5, 2023
ec75e00
_dependency_source: Bring hash types into __init__
tetsuo-cpp Jan 5, 2023
31d7cff
resolvelib: Construct a provider every time
tetsuo-cpp Jan 15, 2023
d3d76d9
test: Get most tests passing
tetsuo-cpp Jan 16, 2023
0d2f415
test: Fix canonical name mismatch test
tetsuo-cpp Jan 16, 2023
f30487a
resolvelib: Pass in project name separately
tetsuo-cpp Jan 16, 2023
5b4f301
Revert "test: Fix canonical name mismatch test"
tetsuo-cpp Jan 16, 2023
13d4f13
test: Fix candidate constructor call
tetsuo-cpp Jan 16, 2023
c2c2cc1
test: Small PyPI provider refactor and fix tests
tetsuo-cpp Jan 16, 2023
b7d55d2
test: Mock the failure cases to ensure that they aren't failing due to
tetsuo-cpp Jan 16, 2023
437b208
test: Fill out some test coverage
tetsuo-cpp Jan 16, 2023
67fd643
lint: Fix type checking
tetsuo-cpp Jan 16, 2023
911cd0d
resolvelib: Check for non-guaranteed or unsupported hashes
tetsuo-cpp Jan 16, 2023
9eb6ecc
_dependency_source: Document `RequirementHashes` methods
tetsuo-cpp Jan 16, 2023
718a915
Merge branch 'main' into alex/third-party-index-hashing
tetsuo-cpp Jan 16, 2023
dabc42b
test: Add test for using an unsupported hash algorithm
tetsuo-cpp Jan 16, 2023
06cc9d1
test: Add unit tests for `RequirementHashes`
tetsuo-cpp Jan 16, 2023
37b128c
Merge branch 'main' into alex/third-party-index-hashing
tetsuo-cpp Jan 19, 2023
80043af
Merge branch 'main' into alex/third-party-index-hashing
woodruffw Jan 19, 2023
0701244
requirement: Remove assert
tetsuo-cpp Jan 25, 2023
aae5e80
resolvelib: Add comment explaining why we're keeping the session in
tetsuo-cpp Jan 25, 2023
152b57a
Merge remote-tracking branch 'origin/main' into alex/third-party-inde…
tetsuo-cpp Jan 25, 2023
6035fa9
CHANGELOG: Add changelog entry
tetsuo-cpp Jan 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ All versions prior to 0.0.9 are untracked.

## [Unreleased]

### Fixed

* Fixed an issue where hash checking would fail when using third-party indices
([#462](https://github.com/pypa/pip-audit/pull/462))

## [2.4.14]

### Fixed
Expand Down
8 changes: 8 additions & 0 deletions pip_audit/_dependency_source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
DependencyResolverError,
DependencySource,
DependencySourceError,
HashMismatchError,
HashMissingError,
RequirementHashes,
UnsupportedHashAlgorithm,
)
from .pip import PipSource, PipSourceError
from .pyproject import PyProjectSource
Expand All @@ -21,9 +25,13 @@
"DependencyResolverError",
"DependencySource",
"DependencySourceError",
"HashMismatchError",
"HashMissingError",
"PipSource",
"PipSourceError",
"PyProjectSource",
"RequirementHashes",
"RequirementSource",
"ResolveLibResolver",
"UnsupportedHashAlgorithm",
]
104 changes: 101 additions & 3 deletions pip_audit/_dependency_source/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,102 @@ class DependencyFixError(Exception):
pass


class HashMissingError(Exception):
"""
Raised when `RequirementHashes` fails to find any hashes for a resolved
dependency.
"""

pass


class HashMismatchError(Exception):
"""
Raised when `RequirementHashes` fails to match a hash for a given
requirement.
"""

pass


class UnsupportedHashAlgorithm(Exception):
"""
Raised when a `DependencyResolver` encounters an unsupported hash algorithm
in the supplied `RequirementHashes`.
"""

pass


class RequirementHashes:
"""
Represents the hashes contained within a requirements file.
"""

def __init__(self) -> None:
"""
Create a new `RequirementHashes`.
"""
self.mapping: dict[str, dict[str, list[str]]] = {}

def add_req(self, req_name: str, hash_options_mapping: dict[str, list[str]]) -> None:
"""
Add a set of hashes for a given requirement.

`req_name`is the name of the requirement to check.

`hash_options_mapping` is a dictionary mapping from algorithm names to a list of potential
hashes. Requirements files are allowed to specify multiple hashes of the same algorithm to
account for different distribution types.
"""
self.mapping[req_name] = hash_options_mapping

def __bool__(self) -> bool:
"""
Check whether any requirements have been added.
"""
return bool(self.mapping)

def __contains__(self, req_name: str) -> bool:
"""
Check whether a given requirement exists in the set of hashes.

`req_name` is the name of the requirement to check.
"""
return req_name in self.mapping

def match(self, req_name: str, dist_hashes: dict[str, str]) -> None:
"""
Check whether any of the provided hashes match the hashes calculated by the dependency
resolver.

`req_name` is the name of the requirement to check.

`dist_hashes` is a mapping of hash algorithms to calculated hashes.
"""
if req_name not in self.mapping:
raise HashMissingError(f"No hashes found for {req_name}")

for algorithm, hashes in self.mapping[req_name].items():
for hash_ in hashes:
if hash_ == dist_hashes[algorithm]:
return
raise HashMismatchError(
f"Mismatching hash for {req_name}, none of the calculated hashes ({dist_hashes}) "
f"matched expected ({self.mapping[req_name]})"
)

def supported_algorithms(self, req_name: str) -> list[str]:
"""
Returns a list of hash algorithms that are supported for a given requirement.

`req_name` is the name of the requirement to check.
"""
if req_name not in self.mapping:
return []
return list(self.mapping[req_name].keys())


class DependencyResolver(ABC):
"""
Represents an abstract resolver of Python dependencies that takes a single
Expand All @@ -69,14 +165,16 @@ class DependencyResolver(ABC):
"""

@abstractmethod
def resolve(self, req: Requirement) -> list[Dependency]: # pragma: no cover
def resolve(
self, req: Requirement, req_hashes: RequirementHashes
) -> list[Dependency]: # pragma: no cover
"""
Resolve a single `Requirement` into a list of `Dependency` instances.
"""
raise NotImplementedError

def resolve_all(
self, reqs: Iterator[Requirement]
self, reqs: Iterator[Requirement], req_hashes: RequirementHashes
) -> Iterator[tuple[Requirement, list[Dependency]]]:
"""
Resolve a collection of `Requirement`s into their respective `Dependency` sets.
Expand All @@ -85,7 +183,7 @@ def resolve_all(
a more optimized one.
"""
for req in reqs:
yield (req, self.resolve(req))
yield (req, self.resolve(req, req_hashes))


class DependencyResolverError(Exception):
Expand Down
4 changes: 3 additions & 1 deletion pip_audit/_dependency_source/pyproject.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
DependencyResolverError,
DependencySource,
DependencySourceError,
RequirementHashes,
)
from pip_audit._fix import ResolvedFixVersion
from pip_audit._service import Dependency, ResolvedDependency, SkippedDependency
Expand Down Expand Up @@ -74,8 +75,9 @@ def collect(self) -> Iterator[Dependency]:
return

reqs: list[Requirement] = [Requirement(dep) for dep in deps]
req_hashes = RequirementHashes()
try:
for _, deps in self.resolver.resolve_all(iter(reqs)):
for _, deps in self.resolver.resolve_all(iter(reqs), req_hashes):
for dep in deps:
# Don't allow duplicate dependencies to be returned
if dep in collected:
Expand Down
49 changes: 23 additions & 26 deletions pip_audit/_dependency_source/requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
DependencyResolverError,
DependencySource,
DependencySourceError,
RequirementHashes,
)
from pip_audit._fix import ResolvedFixVersion
from pip_audit._service import Dependency
Expand Down Expand Up @@ -222,7 +223,7 @@ def _fix_file(self, filename: Path, fix_version: ResolvedFixVersion) -> None:
)
print(f"{fix_version.dep.canonical_name}=={fix_version.version}", file=f)
except DependencyResolverError as dre:
raise RequirementFixError from dre
raise RequirementFixError(str(dre)) from dre

def _recover_files(self, tmp_files: list[IO[str]]) -> None:
for (filename, tmp_file) in zip(self._filenames, tmp_files):
Expand All @@ -240,18 +241,11 @@ def _recover_files(self, tmp_files: list[IO[str]]) -> None:
def _collect_preresolved_deps(
self,
reqs: Iterator[InstallRequirement],
require_hashes: bool = False,
) -> Iterator[tuple[Requirement, Dependency]]:
"""
Collect pre-resolved (pinned) dependencies, optionally enforcing a
hash requirement policy.
Collect pre-resolved (pinned) dependencies.
"""
for req in reqs:
if require_hashes and not req.hash_options:
raise RequirementSourceError(
f"requirement {req.name} does not contain a hash {str(req)}"
)

# NOTE: URL dependencies cannot be pinned, so skipping them
# makes sense (under the same principle of skipping dependencies
# that can't be found on PyPI). This is also consistent with
Expand All @@ -268,13 +262,11 @@ def _collect_preresolved_deps(
pinned_specifier = PINNED_SPECIFIER_RE.match(str(req.specifier))
if pinned_specifier is None:
raise RequirementSourceError(
f"requirement {req.name} is not pinned: {str(req)}"
f"requirement {req.name} is not pinned to an exact version: {str(req)}"
)

yield req.req, ResolvedDependency(
req.name,
Version(pinned_specifier.group("version")),
self._build_hash_options_mapping(req.hash_options),
req.name, Version(pinned_specifier.group("version"))
)

def _build_hash_options_mapping(self, hash_options: list[str]) -> dict[str, list[str]]:
Expand Down Expand Up @@ -306,26 +298,31 @@ def _collect_cached_deps(

new_cached_deps_for_file: dict[Requirement, set[Dependency]] = dict()

# There are three cases where we skip dependency resolution:
#
# 1. The user has explicitly specified `--require-hashes`.
# 2. One or more parsed requirements has hashes specified, enabling
# hash checking for all requirements.
# 3. The user has explicitly specified `--no-deps`.
require_hashes = self._require_hashes or any(req.hash_options for req in reqs)
skip_deps = require_hashes or self._no_deps
if skip_deps:
for req, dep in self._collect_preresolved_deps(
iter(reqs), require_hashes=require_hashes
):
# Skip dependency resolution if the user has specified `--no-deps`
if self._no_deps:
for req, dep in self._collect_preresolved_deps(iter(reqs)):
if req not in new_cached_deps_for_file:
new_cached_deps_for_file[req] = set()
new_cached_deps_for_file[req].add(dep)
yield req, dep
else:
require_hashes = self._require_hashes or any(req.hash_options for req in reqs)
req_hashes = RequirementHashes()

# If we're requiring hashes, enforce that all requirements are hashed
if require_hashes:
for hash_req in reqs:
if not hash_req.hash_options:
raise RequirementSourceError(
f"requirement {hash_req.name} does not contain a hash {str(hash_req)}"
)
req_hashes.add_req(
hash_req.name, self._build_hash_options_mapping(hash_req.hash_options)
)

# Invoke the dependency resolver to turn requirements into dependencies
req_values: list[Requirement] = [r.req for r in reqs]
for req, resolved_deps in self._resolver.resolve_all(iter(req_values)):
for req, resolved_deps in self._resolver.resolve_all(iter(req_values), req_hashes):
for dep in resolved_deps:
if req not in new_cached_deps_for_file:
new_cached_deps_for_file[req] = set()
Expand Down
Loading