diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -92,6 +92,10 @@ class Chain(Token): else: previous = [token] + @property + def is_skippable(self): + return all(x.is_skippable for x in self.content) + def __str__(self): return "(" + "".join(str(x) for x in self.content) + ")" @@ -159,6 +163,8 @@ class Regexp: rules[key].add(j) end_states = set(r.list_last()) + if r.is_skippable: + end_states.add(-1) return rules, end_states @@ -177,7 +183,7 @@ class Regexp: if __name__ == "__main__": - tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] + tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] for pattern in ["a*b*", "a+b+", "(ab)*", "(ab)+", "a((bc)*d)*"]: print("#", pattern) r = Regexp(pattern)