# HG changeset patch # User Laman # Date 2024-06-29 20:53:59 # Node ID f0f9655e62ee1e688546dcbc3f8a51e3d88a65af # Parent f0b3b1bd87f4c39edd2db562c6d5e81b86e778be added a method for finding a string distinguishing between two regexps diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -332,10 +332,31 @@ class RegexpDFA: queue.append(sj) rules.extend(index[sj] for sj in row) - end_states = {index[si] for si in self.end_states} + end_states = {index[si] for si in self.end_states if si in index} return RegexpDFA(rules, end_states, self.alphabet_index) + def find_distinguishing_string(self, r): + if self.rules == r.rules and self.end_states == r.end_states: + return None + + product = self._build_product_automaton(r) + + n = len(product.alphabet_index) + reverse_alphabet_index = {v: k for (k, v) in product.alphabet_index.items()} + queue = deque([(0, "")]) + visited = {0} + while queue: + (state, acc) = queue.popleft() + if state in product.end_states: + return acc + for (i, target) in enumerate(product.rules[state*n:(state+1)*n]): + if target not in visited: + queue.append((target, acc+reverse_alphabet_index[i])) + visited.add(target) + + assert False + def _find_equivalent_states(self): n = len(self.alphabet_index) state_list = list(range(len(self.rules) // n)) @@ -380,6 +401,24 @@ class RegexpDFA: return (rules, end_states) + def _build_product_automaton(self, r): + n = len(self.alphabet_index) + m = len(r.rules) // n + k = len(self.rules) // n + + rules = [] + end_states = set() + + for s1 in range(k): + row1 = self.rules[s1*n:(s1+1)*n] + for s2 in range(m): + row2 = r.rules[s2*n:(s2+1)*n] + rules.extend([x*m + y for (x, y) in zip(row1, row2)]) + if (s1 in self.end_states) != (s2 in r.end_states): + end_states.add(s1*m + s2) + + return RegexpDFA(rules, end_states, self.alphabet_index).reduce().normalize() + def test(): tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] @@ -408,6 +447,11 @@ if __name__ == "__main__": match_parser.add_argument("string") match_parser.set_defaults(name="match") + compare_parser = subparsers.add_parser("compare") + compare_parser.add_argument("pattern1") + compare_parser.add_argument("pattern2") + compare_parser.set_defaults(name="compare") + args = parser.parse_args() if args.name == "test": @@ -419,5 +463,9 @@ if __name__ == "__main__": print("Failed to parse the regexp:") print(e) print(r.match(args.string)) + elif args.name == "compare": + r1 = RegexpDFA.create(args.pattern1).reduce().normalize() + r2 = RegexpDFA.create(args.pattern2).reduce().normalize() + print(r1.find_distinguishing_string(r2)) else: - parser.print_help() \ No newline at end of file + parser.print_help() diff --git a/src/main.rs b/src/main.rs --- a/src/main.rs +++ b/src/main.rs @@ -31,7 +31,22 @@ fn main() { } }; println!("{}", r.eval(args[3].to_string())); - } + }, + "compare" => { + let r1 = match Regexp::new(&args[2].to_string()) { + Ok(r) => r.determinize().reduce().normalize(), + Err(e) => { + panic!("ERROR: {e}"); + } + }; + let r2 = match Regexp::new(&args[3].to_string()) { + Ok(r) => r.determinize().reduce().normalize(), + Err(e) => { + panic!("ERROR: {e}"); + } + }; + println!("{}", r1.find_distinguishing_string(&r2).unwrap_or("None".to_string())); + }, s => { println!("An unknown command: \"{s}\". Use \"match\" or \"test\".") } diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -201,6 +201,33 @@ impl RegexpDFA { return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()}; } + pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option { + if self.rules == other.rules && self.end_states == other.end_states { + return None; + } + + let product = self.build_product_automaton(other); + let n = product.alphabet_index.len(); + let reverse_alphabet_index: HashMap = HashMap::from_iter(product.alphabet_index.iter().map(|(&k, &v)| (v, k))); + + let mut queue = VecDeque::from([(0, "".to_string())]); + let mut visited = HashSet::new(); + while !queue.is_empty() { + let (state, acc) = queue.pop_front().unwrap(); + if product.end_states.contains(&state) { + return Some(acc); + } + for (i, target) in product.rules[state*n..(state+1)*n].iter().enumerate() { + if !visited.contains(target) { + queue.push_back((*target, acc.clone()+&String::from(reverse_alphabet_index[&i]))); + visited.insert(target); + } + } + } + + panic!(); + } + fn find_equivalent_states(&self) -> Vec<(usize, usize)> { let n = self.alphabet_index.len(); let state_vec: Vec = (0..self.rules.len()/n).collect(); @@ -256,4 +283,26 @@ impl RegexpDFA { return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()}; } + + fn build_product_automaton(&self, other: &RegexpDFA) -> RegexpDFA { + let n = self.alphabet_index.len(); + let m = other.rules.len() / n; + let k = self.rules.len() / n; + + let mut rules = vec![]; + let mut end_states = HashSet::new(); + + for s1 in 0..k { + let row1 = &self.rules[s1*n..(s1+1)*n]; + for s2 in 0..m { + let row2 = &other.rules[s2*n..(s2+1)*n]; + rules.extend(row1.iter().zip(row2.iter()).map(|(x, y)| x*m + y)); + if (self.end_states.contains(&s1)) ^ (other.end_states.contains(&s2)) { + end_states.insert(s1*m + s2); + } + } + } + + return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()}.reduce().normalize(); + } }