Skip to content

Commit

Permalink
Optimize performance of character class predicates
Browse files Browse the repository at this point in the history
Note that because letters are contiguous blocks in ASCII,
we can compare with lower and upper boundaries,
which is more efficient than checking containment in a set or string.
  • Loading branch information
Cito committed Dec 12, 2021
1 parent 9f35a7a commit ad8d62c
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 23 deletions.
3 changes: 2 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ source = src
omit =
*/conftest.py
*/test_*_fuzz.py
*/assert_valid_name.py
*/cached_property.py
*/character_classes.py
*/is_iterable.py
*/assert_valid_name.py

[report]
exclude_lines =
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ a query language for APIs created by Facebook.

The current version 3.1.6 of GraphQL-core is up-to-date with GraphQL.js version 15.5.1.

An extensive test suite with nearly 2300 unit tests and 100% coverage comprises a
An extensive test suite with over 2300 unit tests and 100% coverage comprises a
replication of the complete test suite of GraphQL.js, making sure this port is
reliable and compatible with GraphQL.js.

Expand Down
75 changes: 55 additions & 20 deletions src/graphql/language/character_classes.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,68 @@
__all__ = ["is_digit", "is_letter", "is_name_start", "is_name_continue"]

try:
"string".isascii()
except AttributeError: # Python < 3.7

def is_digit(char: str) -> bool:
"""Check whether char is a digit
def is_digit(char: str) -> bool:
"""Check whether char is a digit
For internal use by the lexer only.
"""
return "0" <= char <= "9"
For internal use by the lexer only.
"""
return "0" <= char <= "9"

def is_letter(char: str) -> bool:
"""Check whether char is a plain ASCII letter
def is_letter(char: str) -> bool:
"""Check whether char is a plain ASCII letter
For internal use by the lexer only.
"""
return "a" <= char <= "z" or "A" <= char <= "Z"

For internal use by the lexer only.
"""
return "A" <= char <= "Z" or "a" <= char <= "z"
def is_name_start(char: str) -> bool:
"""Check whether char is allowed at the beginning of a GraphQL name
For internal use by the lexer only.
"""
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"

def is_name_start(char: str) -> bool:
"""Check whether char is allowed at the beginning of a GraphQL name
def is_name_continue(char: str) -> bool:
"""Check whether char is allowed in the continuation of a GraphQL name
For internal use by the lexer only.
"""
return is_letter(char) or char == "_"
For internal use by the lexer only.
"""
return (
"a" <= char <= "z"
or "A" <= char <= "Z"
or "0" <= char <= "9"
or char == "_"
)

else:

def is_name_continue(char: str) -> bool:
"""Check whether char is allowed in the continuation of a GraphQL name
def is_digit(char: str) -> bool:
"""Check whether char is a digit
For internal use by the lexer only.
"""
return is_letter(char) or is_digit(char) or char == "_"
For internal use by the lexer only.
"""
return char.isascii() and char.isdigit()

def is_letter(char: str) -> bool:
"""Check whether char is a plain ASCII letter
For internal use by the lexer only.
"""
return char.isascii() and char.isalpha()

def is_name_start(char: str) -> bool:
"""Check whether char is allowed at the beginning of a GraphQL name
For internal use by the lexer only.
"""
return char.isascii() and (char.isalpha() or char == "_")

def is_name_continue(char: str) -> bool:
"""Check whether char is allowed in the continuation of a GraphQL name
For internal use by the lexer only.
"""
return char.isascii() and (char.isalnum() or char == "_")
2 changes: 1 addition & 1 deletion src/graphql/language/lexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def read_hex_digit(char: str) -> int:
return ord(char) - 48
elif "A" <= char <= "F":
return ord(char) - 55
elif "a" <= char <= "f": # a-f
elif "a" <= char <= "f":
return ord(char) - 87
return -1

Expand Down
90 changes: 90 additions & 0 deletions tests/language/test_character_classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from string import ascii_letters as letters, digits, punctuation

from graphql.language.character_classes import (
is_digit,
is_letter,
is_name_start,
is_name_continue,
)

non_ascii = "¯_±¹²³½£ºµÄäÖöØø×〇᧐〸αΑωΩ"


def describe_digit():
def accepts_digits():
assert all(is_digit(char) for char in digits)

def rejects_letters():
assert not any(is_digit(char) for char in letters)

def rejects_underscore():
assert not is_digit("_")

def rejects_punctuation():
assert not any(is_digit(char) for char in punctuation)

def rejects_non_ascii():
assert not any(is_digit(char) for char in non_ascii)

def rejects_empty_string():
assert not is_digit("")


def describe_letter():
def accepts_letters():
assert all(is_letter(char) for char in letters)

def rejects_digits():
assert not any(is_letter(char) for char in digits)

def rejects_underscore():
assert not is_letter("_")

def rejects_punctuation():
assert not any(is_letter(char) for char in punctuation)

def rejects_non_ascii():
assert not any(is_letter(char) for char in non_ascii)

def rejects_empty_string():
assert not is_letter("")


def describe_name_start():
def accepts_letters():
assert all(is_name_start(char) for char in letters)

def accepts_underscore():
assert is_name_start("_")

def rejects_digits():
assert not any(is_name_start(char) for char in digits)

def rejects_punctuation():
assert not any(is_name_start(char) for char in punctuation if char != "_")

def rejects_non_ascii():
assert not any(is_name_start(char) for char in non_ascii)

def rejects_empty_string():
assert not is_name_start("")


def describe_name_continue():
def accepts_letters():
assert all(is_name_continue(char) for char in letters)

def accepts_digits():
assert all(is_name_continue(char) for char in digits)

def accepts_underscore():
assert is_name_continue("_")

def rejects_punctuation():
assert not any(is_name_continue(char) for char in punctuation if char != "_")

def rejects_non_ascii():
assert not any(is_name_continue(char) for char in non_ascii)

def rejects_empty_string():
assert not is_name_continue("")

0 comments on commit ad8d62c

Please sign in to comment.