diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -332,10 +332,31 @@ class RegexpDFA: queue.append(sj) rules.extend(index[sj] for sj in row) - end_states = {index[si] for si in self.end_states} + end_states = {index[si] for si in self.end_states if si in index} return RegexpDFA(rules, end_states, self.alphabet_index) + def find_distinguishing_string(self, r): + if self.rules == r.rules and self.end_states == r.end_states: + return None + + product = self._build_product_automaton(r) + + n = len(product.alphabet_index) + reverse_alphabet_index = {v: k for (k, v) in product.alphabet_index.items()} + queue = deque([(0, "")]) + visited = {0} + while queue: + (state, acc) = queue.popleft() + if state in product.end_states: + return acc + for (i, target) in enumerate(product.rules[state*n:(state+1)*n]): + if target not in visited: + queue.append((target, acc+reverse_alphabet_index[i])) + visited.add(target) + + assert False + def _find_equivalent_states(self): n = len(self.alphabet_index) state_list = list(range(len(self.rules) // n)) @@ -380,6 +401,24 @@ class RegexpDFA: return (rules, end_states) + def _build_product_automaton(self, r): + n = len(self.alphabet_index) + m = len(r.rules) // n + k = len(self.rules) // n + + rules = [] + end_states = set() + + for s1 in range(k): + row1 = self.rules[s1*n:(s1+1)*n] + for s2 in range(m): + row2 = r.rules[s2*n:(s2+1)*n] + rules.extend([x*m + y for (x, y) in zip(row1, row2)]) + if (s1 in self.end_states) != (s2 in r.end_states): + end_states.add(s1*m + s2) + + return RegexpDFA(rules, end_states, self.alphabet_index).reduce().normalize() + def test(): tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] @@ -408,6 +447,11 @@ if __name__ == "__main__": match_parser.add_argument("string") match_parser.set_defaults(name="match") + compare_parser = subparsers.add_parser("compare") + compare_parser.add_argument("pattern1") + compare_parser.add_argument("pattern2") + compare_parser.set_defaults(name="compare") + args = parser.parse_args() if args.name == "test": @@ -419,5 +463,9 @@ if __name__ == "__main__": print("Failed to parse the regexp:") print(e) print(r.match(args.string)) + elif args.name == "compare": + r1 = RegexpDFA.create(args.pattern1).reduce().normalize() + r2 = RegexpDFA.create(args.pattern2).reduce().normalize() + print(r1.find_distinguishing_string(r2)) else: - parser.print_help() \ No newline at end of file + parser.print_help()