forked from attardi/wikiextractor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfindBalanced.py
44 lines (43 loc) · 1.42 KB
/
findBalanced.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
def findBalanced(text, openDelim, closeDelim):
"""
Assuming that text contains a properly balanced expression using
:param openDelim: as opening delimiters and
:param closeDelim: as closing delimiters.
:return: an iterator producing pairs (start, end) of start and end
positions in text containing a balanced expression.
"""
openPat = '|'.join([re.escape(x) for x in openDelim])
# patter for delimiters expected after each opening delimiter
afterPat = { o:re.compile(openPat+'|'+c, re.DOTALL) for o,c in zip(openDelim, closeDelim)}
stack = []
start = 0
cur = 0
end = len(text)
startSet = False
startPat = re.compile(openPat)
nextPat = startPat
while True:
next = nextPat.search(text, cur)
if not next:
return
if not startSet:
start = next.start()
startSet = True
delim = next.group(0)
if delim in openDelim:
stack.append(delim)
nextPat = afterPat[delim]
else:
opening = stack.pop()
# assert opening == openDelim[closeDelim.index(next.group(0))]
if stack:
nextPat = afterPat[stack[-1]]
else:
yield start, next.end()
nextPat = startPat
start = next.end()
startSet = False
cur = next.end()