Re-indent the contents of docstrings

This uses the PEP 257 algorithm for determining docstring indentation, and adjusts the contents of docstrings to match their new indentation after `black` is applied. A small normalization is necessary to `assert_equivalent` because the trees are technically no longer precisely equivalent -- some constant strings have changed. When comparing two ASTs, whitespace after newlines within constant strings is thus folded into a single space.
psf · Oct 10, 2019 · 3c9f709 · 3c9f709
1 parent d9e71a7
commit 3c9f709
Show file tree

Hide file tree

Showing 3 changed files with 121 additions and 1 deletion.
diff --git a/black.py b/black.py
@@ -1805,6 +1805,18 @@ def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]:
             yield from self.line()
         yield from self.visit_default(leaf)
 
+    def visit_STRING(self, leaf: Leaf) -> Iterator[Line]:
+        # Check if it's a docstring
+        if prev_siblings_are(
+            leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt]
+        ) and is_multiline_string(leaf):
+            prefix = "    " * self.current_line.depth
+            docstring = fix_docstring(leaf.value[3:-3], prefix)
+            leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:]
+            normalize_string_quotes(leaf)
+
+        yield from self.visit_default(leaf)
+
     def __attrs_post_init__(self) -> None:
         """You are in a twisty little maze of passages."""
         v = self.visit_stmt
@@ -2086,6 +2098,22 @@ def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]:
     return None
 
 
+def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool:
+    """Return if the `node` and its previous siblings match types against the provided
+    list of tokens; the provided `node`has its type matched against the last element in
+    the list.  `None` can be used as the first element to declare that the start of the
+    list is anchored at the start of its parent's children."""
+    if not tokens:
+        return True
+    if tokens[-1] is None:
+        return node is None
+    if not node:
+        return False
+    if node.type != tokens[-1]:
+        return False
+    return prev_siblings_are(node.prev_sibling, tokens[:-1])
+
+
 def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]:
     """Return the child of `ancestor` that contains `descendant`."""
     node: Optional[LN] = descendant
@@ -3668,7 +3696,17 @@ def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[st
                 yield from _v(value, depth + 2)
 
             else:
-                yield f"{'  ' * (depth+2)}{value!r},  # {value.__class__.__name__}"
+                # Constant strings may be indented across newlines, if they are
+                # docstrings; fold spaces after newlines when comparing
+                if (
+                    isinstance(node, ast.Constant)
+                    and field == "value"
+                    and isinstance(value, str)
+                ):
+                    normalized = re.sub(r"\n[ \t]+", "\n ", value)
+                else:
+                    normalized = value
+                yield f"{'  ' * (depth+2)}{normalized!r},  # {value.__class__.__name__}"
 
         yield f"{'  ' * depth})  # /{node.__class__.__name__}"
 
@@ -4054,5 +4092,32 @@ def patched_main() -> None:
     main()
 
 
+def fix_docstring(docstring: str, prefix: str) -> str:
+    # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation
+    if not docstring:
+        return ""
+    # Convert tabs to spaces (following the normal Python rules)
+    # and split into a list of lines:
+    lines = docstring.expandtabs().splitlines()
+    # Determine minimum indentation (first line doesn't count):
+    indent = sys.maxsize
+    for line in lines[1:]:
+        stripped = line.lstrip()
+        if stripped:
+            indent = min(indent, len(line) - len(stripped))
+    # Remove indentation (first line is special):
+    trimmed = [lines[0].strip()]
+    if indent < sys.maxsize:
+        last_line_idx = len(lines) - 2
+        for i, line in enumerate(lines[1:]):
+            stripped_line = line[indent:].rstrip()
+            if stripped_line or i == last_line_idx:
+                trimmed.append(prefix + stripped_line)
+            else:
+                trimmed.append("")
+    # Return a single string:
+    return "\n".join(trimmed)
+
+
 if __name__ == "__main__":
     patched_main()
diff --git a/tests/data/docstring.py b/tests/data/docstring.py
@@ -0,0 +1,47 @@
+def foo():
+  """This is a docstring with
+  some lines of text here"""
+  return
+
+
+def bar():
+  '''This is another docstring
+  with more lines of text'''
+  return
+
+
+def baz():
+  '''This is a string with some
+  embedded "quotes"'''
+  return
+
+
+def troz():
+	'''Indentation with tabs
+	is just as OK'''
+	return
+
+# output
+
+def foo():
+    """This is a docstring with
+    some lines of text here"""
+    return
+
+
+def bar():
+    """This is another docstring
+    with more lines of text"""
+    return
+
+
+def baz():
+    '''This is a string with some
+    embedded "quotes"'''
+    return
+
+
+def troz():
+    """Indentation with tabs
+    is just as OK"""
+    return
diff --git a/tests/test_black.py b/tests/test_black.py
@@ -365,6 +365,14 @@ def test_string_quotes(self) -> None:
         black.assert_equivalent(source, not_normalized)
         black.assert_stable(source, not_normalized, mode=mode)
 
+    @patch("black.dump_to_file", dump_to_stderr)
+    def test_docstring(self) -> None:
+        source, expected = read_data("docstring")
+        actual = fs(source)
+        self.assertFormatEqual(expected, actual)
+        black.assert_equivalent(source, actual)
+        black.assert_stable(source, actual, black.FileMode())
+
     @patch("black.dump_to_file", dump_to_stderr)
     def test_slices(self) -> None:
         source, expected = read_data("slices")