@@ -127,10 +127,50 @@ def parse(pattern, offset=0):
return Chain(res)
class Regexp:
def __init__(self, s):
(self.rules, self.end_states) = self._parse(s)
def _parse(self, s):
r = parse(s)
rules = dict()
for i in r.list_first():
c = s[i]
key = (-1, c)
if key not in rules:
rules[key] = set()
rules[key].add(i)
for (i, j) in r.list_neighbours():
c = s[j]
key = (i, c)
rules[key].add(j)
end_states = set(r.list_last())
return rules, end_states
def match(self, s):
current = {-1}
for c in s:
new_state = set()
for st in current:
key = (st, c)
if key in self.rules:
new_state.update(self.rules[key])
current = new_state
return any(st in self.end_states for st in current)
if __name__ == "__main__":
tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]
for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"]:
print("#", pattern)
p = parse(pattern)
print(p, list(p.list_first()), list(p.list_last()))
print(list(p.list_neighbours()))
for t in tests:
print(t, Regexp(pattern).match(t))
print()
Status change: