diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -271,9 +271,16 @@ class Regexp: class RegexpDFA: - def __init__(self, pattern): + def __init__(self, rules, end_states): + self.rules = rules + self.end_states = end_states + + @classmethod + def create(cls, pattern): r = Regexp(pattern) - (self.rules, self.end_states) = r.determinize() + (rules, end_states) = r.determinize() + + return cls(rules, end_states) def match(self, s): st = (-1,) @@ -287,13 +294,56 @@ class RegexpDFA: return st in self.end_states + def reduce(self): + equivalents = self._find_equivalent_states() + (rules, end_states) = self._collapse_states(equivalents) + + return RegexpDFA(rules, end_states) + + def _find_equivalent_states(self): + state_set = [(-2,), (-1,)] + sorted(set(self.rules.values())) + alphabet = {c for (st, c) in self.rules.keys()} + equivalents = {(s1, s2) for (i, s1) in enumerate(state_set) for s2 in state_set[i+1:]} + + for (s1, s2) in equivalents.copy(): + if (s1 in self.end_states and s2 not in self.end_states) or (s1 not in self.end_states and s2 in self.end_states): + equivalents.remove((s1, s2)) + + ctrl = True + while ctrl: + ctrl = False + for (s1, s2) in equivalents.copy(): + for c in alphabet: + t1 = self.rules.get((s1, c), (-2,)) + t2 = self.rules.get((s2, c), (-2,)) + key = (min(t1, t2), max(t1, t2)) + if t1 != t2 and key not in equivalents: + equivalents.remove((s1, s2)) + ctrl = True + break + + return equivalents + + def _collapse_states(self, equivalents): + rules = self.rules.items() + end_states = self.end_states.copy() + + for (s1, s2) in sorted(equivalents): + rules = map( + lambda item: (item[0], s1 if item[1] == s2 else item[1]), + filter(lambda item: item[0][0] != s2, rules) + ) + end_states.discard(s2) + + return (dict(rules), end_states) + if __name__ == "__main__": tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] - for pattern in ["*", "((a)", "a)"]: + for pattern in ["a(b|c)", "a*b*", "(ab)*", "a((bc)*d)*"]: print("#", pattern) try: - r = RegexpDFA(pattern) + r = RegexpDFA.create(pattern).reduce() except ParsingError as e: print("Failed to parse the regexp:") print(e)