Skip to content

Commit

Permalink
Minor bug fixes and some regex preprocessing of input notations
Browse files Browse the repository at this point in the history
  • Loading branch information
Old-Shatterhand committed Feb 21, 2025
1 parent 5821183 commit c2d7e37
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 10 deletions.
6 changes: 3 additions & 3 deletions glyles/glycans/mono/reactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,9 +490,9 @@ def set_fg(self, c_or_o, pos, bond_elem, name):
return True

# in case of a carbon chain
elif (len(name) >= 5 and name[1] == "C" and name[2:4].isnumeric()) or \
(len(name) >= 6 and name[1:3] in "aCiC" and name[3:5].isnumeric()) or \
(len(name) >= 7 and name[1:4] == "aiC" and name[4:6].isnumeric()):
elif len(name) > 1 and (name[1] == "C" and name[2:4].isnumeric()) or \
(name[1:3] in "aCiC" and name[3:5].isnumeric()) or \
(name[1:4] == "aiC" and name[4:6].isnumeric()):
if bond_elem == "P":
bond_elem = "OP(=O)(O)"
self.side_chains[pos][c_or_o] += bond_elem + self.parse_poly_carbon(name)
Expand Down
10 changes: 8 additions & 2 deletions glyles/glycans/poly/glycan.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from collections import Counter
import re

import networkx as nx
from glyles.iupac.IUPACParser import IUPACParser
Expand Down Expand Up @@ -98,6 +99,10 @@ def recipe_equality(
return False


def prepare(iupac):
return re.sub(r'(?<!\d)dTal', '6dTal', iupac)


class Glycan:
"""
This class is like an interaction with the Parser for the IUPAC representation of the glycan. The grammar for
Expand Down Expand Up @@ -370,8 +375,9 @@ def parse(self):
# parse the remaining structure description following the grammar, also add the dummy characters
if not isinstance(self.iupac, str):
raise ParseError("Only string input can be parsed: " + str(self.iupac))
self.iupac = prepare('#' + self.iupac + '#')

stream = InputStream(data='#' + self.iupac + '#')
stream = InputStream(data=self.iupac)
lexer = IUPACLexer(stream)
token = CommonTokenStream(lexer)
parser = IUPACParser(token)
Expand Down Expand Up @@ -411,4 +417,4 @@ def parse(self):


if __name__ == "__main__":
print(Glycan("Fuc(a1-2)[GalNAc(a1-3)]Gal(b1-4)GlcNAc(b1-3)[Fuc(a1-2)[GalNAc(a1-3)]Gal(b1-4)GlcNAc(b1-6)]Gal(b1-3)[GlcNAc(a1-4)Gal(b1-4)GlcNAc6S(b1-6)]GalNAc", tree_only=True).summary())
print(Glycan("dTal").get_smiles())
3 changes: 2 additions & 1 deletion glyles/glycans/poly/gwb_glycan.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from glyles.glycans.utils import ParseError
from glyles.gwb.GWBLexer import GWBLexer
from glyles.gwb.GWBParser import GWBParser
from glyles.glycans.poly.glycan import Glycan
from glyles.glycans.poly.glycan import Glycan, prepare


def graph_to_string_int(graph: nx.DiGraph, node2label: callable) -> str:
Expand Down Expand Up @@ -78,6 +78,7 @@ def parse(self):
if "$" in self.iupac:
self.iupac = self.iupac[:self.iupac.index("$")]
self.iupac += "$"
prepare(self.iupac)
stream = InputStream(data=self.iupac)
lexer = GWBLexer(stream)
token = CommonTokenStream(lexer)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gwb.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_gwb_iupac(data):
@pytest.mark.fuzzy
@pytest.mark.parametrize("gwb", fuzzy_data)
def test_gwb_fuzzy(gwb):
if any(x in gwb for x in {"dTal", "End--??1P$", "End--??1S$", "[", "]"}): # skip specific stuff and repeats
if any(x in gwb for x in {"[", "]"}): # skip repeats
pytest.skip()
iupac = GWBGlycan(gwb).to_iupac()
assert iupac is not None
Expand Down
6 changes: 3 additions & 3 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,12 @@ def test_extreme_2(mode):
def test_grammar_main(iupac, mode):
assert Glycan(reduce_notation(iupac, mode), tree_only=True).get_tree() is not None

def test_parse_1(self):
def test_parse_1():
g = Glycan("Man").get_tree()

check_initial(g, "Man", 0, Config.UNDEF, lactole=Lactole.PYRANOSE)

def test_parse_1_2(self):
def test_parse_1_2():
g = Glycan("Manpa").get_tree()

check_initial(g, "Man", 0, Config.ALPHA, lactole=Lactole.PYRANOSE)
Expand Down Expand Up @@ -460,7 +460,7 @@ def test_parse_ternary_branching_2(mode):
check_child(g, id_child_1, id_child_22, "Glc", "(a1-4)", 0, Lactole.PYRANOSE)
check_child(g, id_child_1, id_child_23, "Gal", "(a1-6)", 0, Lactole.PYRANOSE)

def test_parsing_error(self, caplog):
def test_parsing_error(caplog):
iupac = "Alt(a1-2)[Glc(a1-4)][Gal(a1-6)]Gul(a1-4)M*#$s'\\d ;«]as;an" # Invalid IUPAC string!
g = Glycan(iupac).get_tree()
assert g is None
Expand Down

0 comments on commit c2d7e37

Please sign in to comment.