@@ -34,103 +34,143 @@ class Symbol(Token):
def __str__(self):
return self.value
class Asterisk(Token):
is_skippable = True
def __init__(self, content: Token):
self.content = content
def list_first(self):
yield from self.content.list_first()
def list_last(self):
yield from self.content.list_last()
def list_neighbours(self):
yield from self.content.list_neighbours()
for x in self.list_last():
for y in self.list_first():
yield (x, y)
return str(self.content) + "*"
class Chain(Token):
def __init__(self, content: list):
for token in self.content:
yield from token.list_first()
if not token.is_skippable:
break
for token in reversed(self.content):
yield from token.list_last()
previous = []
for t in previous:
for x in t.list_last():
for y in token.list_first():
yield from token.list_neighbours()
if token.is_skippable:
previous.append(token)
else:
previous = [token]
return "(" + "".join(str(x) for x in self.content) + ")"
def find_closing_parenthesis(pattern, k):
counter = 0
for (i, c) in enumerate(pattern[k:]):
if c == "(":
counter += 1
elif c == ")":
counter -= 1
if counter == 0:
return k+i
return None
def parse(pattern, offset=0):
res = []
i = 0
while i < len(pattern):
c = pattern[i]
j = find_closing_parenthesis(pattern, i)
inner_content = parse(pattern[i+1:j], offset+i+1)
res.append(inner_content)
i = j+1
elif c == "*":
token = res.pop()
res.append(Asterisk(token))
i += 1
res.append(Symbol(i+offset, c))
return Chain(res)
class Regexp:
def __init__(self, s):
(self.rules, self.end_states) = self._parse(s)
def _parse(self, s):
r = parse(s)
rules = dict()
for i in r.list_first():
c = s[i]
key = (-1, c)
if key not in rules:
rules[key] = set()
rules[key].add(i)
for (i, j) in r.list_neighbours():
c = s[j]
key = (i, c)
rules[key].add(j)
end_states = set(r.list_last())
return rules, end_states
def match(self, s):
current = {-1}
for c in s:
new_state = set()
for st in current:
key = (st, c)
if key in self.rules:
new_state.update(self.rules[key])
current = new_state
return any(st in self.end_states for st in current)
if __name__ == "__main__":
tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]
for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"]:
print("#", pattern)
p = parse(pattern)
print(p, list(p.list_first()), list(p.list_last()))
print(list(p.list_neighbours()))
for t in tests:
print(t, Regexp(pattern).match(t))
print()
Status change: