diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -127,10 +127,50 @@ def parse(pattern, offset=0): return Chain(res) +class Regexp: + def __init__(self, s): + (self.rules, self.end_states) = self._parse(s) + + def _parse(self, s): + r = parse(s) + rules = dict() + + for i in r.list_first(): + c = s[i] + key = (-1, c) + if key not in rules: + rules[key] = set() + rules[key].add(i) + + for (i, j) in r.list_neighbours(): + c = s[j] + key = (i, c) + if key not in rules: + rules[key] = set() + rules[key].add(j) + + end_states = set(r.list_last()) + + return rules, end_states + + def match(self, s): + current = {-1} + + for c in s: + new_state = set() + for st in current: + key = (st, c) + if key in self.rules: + new_state.update(self.rules[key]) + current = new_state + + return any(st in self.end_states for st in current) + + if __name__ == "__main__": + tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"]: print("#", pattern) - p = parse(pattern) - print(p, list(p.list_first()), list(p.list_last())) - print(list(p.list_neighbours())) + for t in tests: + print(t, Regexp(pattern).match(t)) print()