From 061c686069808aa07c7505137ef4ba3aec38e62e Mon Sep 17 00:00:00 2001 From: pushfoo <36696816+pushfoo@users.noreply.github.com> Date: Wed, 8 May 2024 00:38:23 -0400 Subject: [PATCH] Eliminate brittle branch parsing logic * Split run helpers into two functions * Rename run_and_regex to run_with_regex * Add strip_run_stdout * Use git status to get branch info instead of getting refs in log * Give up on branch inference when HEAD detached since we can use RTD's env vars --- docs/conf.py | 77 ++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 38 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d95c388..f4c8971 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -103,24 +103,37 @@ def attempt_to( # Some light typing to help things along T = TypeVar('T') R = TypeVar('R') -# Used to process the return values of the ugly function below +# Used to process the return values of functions Converter = Callable[[T], R] -def run_and_regex( +def strip_run_stdout(raw_output: str) -> str: + """Strip annoying cruft from current subprocess runs""" + return raw_output.strip(" \n\"") + + +def run_with_post( + command: str | Iterable[str], + converter: Converter = strip_run_stdout +) -> R: + raw = subprocess.run( + command, check=True, # Auto-raise on non-zero error codes + # Open stdout in text mode and decode the underlying stream as utf-8 + capture_output=True, encoding='utf-8', text=True) + converted = converter(raw.stdout) + return converted + + +def run_with_regex( command: str | Iterable[str], named_group_extractor: Pattern, # MUST use named groups! ) -> dict[str, str]: - """Run console progams & extract data via regex""" + """Run console programs & extract data via regex""" if "(?P<" not in named_group_extractor.pattern: raise ValueError("This pattern MUST use named groups!") # Run & attempt to match with the extractor pattern - result = subprocess.run( - command, check=True, # Auto-raise on non-zero error codes - # Open stdout in text mode and decode the underlying stream as utf-8 - capture_output=True, encoding='utf-8', text=True) - cleaned = result.stdout.strip(" \n\"") + cleaned = run_with_post(command) info(f"Got cleaned info {cleaned!r}") match = named_group_extractor.match(cleaned) @@ -166,8 +179,6 @@ def convert( COMMIT_SIMPLE_REGEX = re.compile(r""" (?P[^\s]+) # Any non-whitespace for date format info [ ]+ # Space between data fields -(?P\([^)]+\)) # Anything in parens (e.g. '(main -> HEAD)') -[ ]+ # Space between data fields (?P[a-fA-F0-9]+) # A hash output """, re.VERBOSE) # The full_hash is used to: @@ -182,28 +193,6 @@ def convert( # * Builds of historic / maintenance versions for comparison -# The raw branch output of git varies for unclear reasons. Figuring out -# why doesn't seem worth the time when more important things need to be -# fixed & finished. Trying patterns in series is faster than regex golf. -def extract_branch_name( - raw: str, - regexes: Iterable[Pattern] = tuple( - re.compile(p) for p in ( - # The readthedocs build runner shows git's branch like this - r'\(HEAD, origin\/(?P\w+), origin\/HEAD, (?P&branch)\)', - # Local dev machines seem to favor this form - r'\(HEAD -> (?P[a-zA-Z0-9_\-]+)(, \w+)*\)' - ) - ) -) -> str | None: - for pattern in regexes: - debug(f"Trying pattern r'{pattern.pattern}'") - match = pattern.match(raw) - if match: - return match.group('branch') - return None - - ######################################################################## # Start of Sphinx Configuration Actions & Data # ######################################################################## @@ -239,18 +228,30 @@ def extract_branch_name( 'https://pillow.readthedocs.io/en/stable', None)) -# -- Read git HEAD & pyproject.toml to start configuring the build -- +# -- Read git state & pyproject.toml to start configuring the build -- with attempt_to("read git HEAD"): - git_head = run_and_regex( - ['git', 'log', '-1', '--format="%aI %d %H"'], + git_head = run_with_regex( + ['git', 'log', '-1', '--format="%aI %H"'], COMMIT_SIMPLE_REGEX) - git_head_datetime: datetime = convert(git_head, 'isodate', datetime.fromisoformat) - branch = convert(git_head, 'branch', extract_branch_name) full_commit_hash = git_head['full_hash'] short_commit_hash = full_commit_hash[:8] - info(f"Detected branch {branch=}, {full_commit_hash=}") + + # -- Try to parse branch, but give up if in detached head state -- + raw_branch = run_with_post( + ['git', 'status', '-s', '-b'], + converter=lambda s: s.strip("\"").split("\n")[0]) + info(f"Got raw status 1st line: status={raw_branch!r}") + + # No need to guess since we can use the READTHEDOCS(_*)? env vars + if "no branch" in raw_branch: + branch = None + _branch_reported = 'detached HEAD state' + else: + branch = raw_branch[2:].strip().split('.')[0] + _branch_reported = f"{branch=}" + info(f"Detected {_branch_reported}, {full_commit_hash=}") with attempt_to("read pyproject.toml for Sphinx config pre-reqs"):