Changeset - f0f9655e62ee
[Not reviewed]
default
0 3 0
Laman - 10 months ago 2024-06-29 20:53:59

added a method for finding a string distinguishing between two regexps
3 files changed with 115 insertions and 3 deletions:
0 comments (0 inline, 0 general)
regexp.py
Show inline comments
 
@@ -332,10 +332,31 @@ class RegexpDFA:
 
					queue.append(sj)
 
			rules.extend(index[sj] for sj in row)
 
		
 
		end_states = {index[si] for si in self.end_states}
 
		end_states = {index[si] for si in self.end_states if si in index}
 

	
 
		return RegexpDFA(rules, end_states, self.alphabet_index)
 

	
 
	def find_distinguishing_string(self, r):
 
		if self.rules == r.rules and self.end_states == r.end_states:
 
			return None
 

	
 
		product = self._build_product_automaton(r)
 

	
 
		n = len(product.alphabet_index)
 
		reverse_alphabet_index = {v: k for (k, v) in product.alphabet_index.items()}
 
		queue = deque([(0, "")])
 
		visited = {0}
 
		while queue:
 
			(state, acc) = queue.popleft()
 
			if state in product.end_states:
 
				return acc
 
			for (i, target) in enumerate(product.rules[state*n:(state+1)*n]):
 
				if target not in visited:
 
					queue.append((target, acc+reverse_alphabet_index[i]))
 
					visited.add(target)
 
		
 
		assert False
 

	
 
	def _find_equivalent_states(self):
 
		n = len(self.alphabet_index)
 
		state_list = list(range(len(self.rules) // n))
 
@@ -380,6 +401,24 @@ class RegexpDFA:
 
		
 
		return (rules, end_states)
 

	
 
	def _build_product_automaton(self, r):
 
		n = len(self.alphabet_index)
 
		m = len(r.rules) // n
 
		k = len(self.rules) // n
 

	
 
		rules = []
 
		end_states = set()
 

	
 
		for s1 in range(k):
 
			row1 = self.rules[s1*n:(s1+1)*n]
 
			for s2 in range(m):
 
				row2 = r.rules[s2*n:(s2+1)*n]
 
				rules.extend([x*m + y for (x, y) in zip(row1, row2)])
 
				if (s1 in self.end_states) != (s2 in r.end_states):
 
					end_states.add(s1*m + s2)
 

	
 
		return RegexpDFA(rules, end_states, self.alphabet_index).reduce().normalize()
 

	
 

	
 
def test():
 
	tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]
 
@@ -408,6 +447,11 @@ if __name__ == "__main__":
 
	match_parser.add_argument("string")
 
	match_parser.set_defaults(name="match")
 

	
 
	compare_parser = subparsers.add_parser("compare")
 
	compare_parser.add_argument("pattern1")
 
	compare_parser.add_argument("pattern2")
 
	compare_parser.set_defaults(name="compare")
 

	
 
	args = parser.parse_args()
 

	
 
	if args.name == "test":
 
@@ -419,5 +463,9 @@ if __name__ == "__main__":
 
			print("Failed to parse the regexp:")
 
			print(e)
 
		print(r.match(args.string))
 
	elif args.name == "compare":
 
		r1 = RegexpDFA.create(args.pattern1).reduce().normalize()
 
		r2 = RegexpDFA.create(args.pattern2).reduce().normalize()
 
		print(r1.find_distinguishing_string(r2))
 
	else:
 
		parser.print_help()
 
\ No newline at end of file
 
		parser.print_help()
src/main.rs
Show inline comments
 
@@ -31,7 +31,22 @@ fn main() {
 
				}
 
			};
 
			println!("{}", r.eval(args[3].to_string()));
 
		}
 
		},
 
		"compare" => {
 
			let r1 = match Regexp::new(&args[2].to_string()) {
 
				Ok(r) => r.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
				}
 
			};
 
			let r2 = match Regexp::new(&args[3].to_string()) {
 
				Ok(r) => r.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
				}
 
			};
 
			println!("{}", r1.find_distinguishing_string(&r2).unwrap_or("None".to_string()));
 
		},
 
		s => {
 
			println!("An unknown command: \"{s}\". Use \"match\" or \"test\".")
 
		}
src/regexp.rs
Show inline comments
 
@@ -201,6 +201,33 @@ impl RegexpDFA {
 
		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};
 
	}
 

	
 
	pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option<String> {
 
		if self.rules == other.rules && self.end_states == other.end_states {
 
			return None;
 
		}
 

	
 
		let product = self.build_product_automaton(other);
 
		let n = product.alphabet_index.len();
 
		let reverse_alphabet_index: HashMap<usize, char> = HashMap::from_iter(product.alphabet_index.iter().map(|(&k, &v)| (v, k)));
 

	
 
		let mut queue = VecDeque::from([(0, "".to_string())]);
 
		let mut visited = HashSet::new();
 
		while !queue.is_empty() {
 
			let (state, acc) = queue.pop_front().unwrap();
 
			if product.end_states.contains(&state) {
 
				return Some(acc);
 
			}
 
			for (i, target) in product.rules[state*n..(state+1)*n].iter().enumerate() {
 
				if !visited.contains(target) {
 
					queue.push_back((*target, acc.clone()+&String::from(reverse_alphabet_index[&i])));
 
					visited.insert(target);
 
				}
 
			}
 
		}
 

	
 
		panic!();
 
	}
 

	
 
	fn find_equivalent_states(&self) -> Vec<(usize, usize)> {
 
		let n = self.alphabet_index.len();
 
		let state_vec: Vec<usize> = (0..self.rules.len()/n).collect();
 
@@ -256,4 +283,26 @@ impl RegexpDFA {
 

	
 
		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};
 
	}
 

	
 
	fn build_product_automaton(&self, other: &RegexpDFA) -> RegexpDFA {
 
		let n = self.alphabet_index.len();
 
		let m = other.rules.len() / n;
 
		let k = self.rules.len() / n;
 

	
 
		let mut rules = vec![];
 
		let mut end_states = HashSet::new();
 

	
 
		for s1 in 0..k {
 
			let row1 = &self.rules[s1*n..(s1+1)*n];
 
			for s2 in 0..m {
 
				let row2 = &other.rules[s2*n..(s2+1)*n];
 
				rules.extend(row1.iter().zip(row2.iter()).map(|(x, y)| x*m + y));
 
				if (self.end_states.contains(&s1)) ^ (other.end_states.contains(&s2)) {
 
					end_states.insert(s1*m + s2);
 
				}
 
			}
 
		}
 

	
 
		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()}.reduce().normalize();
 
	}
 
}
0 comments (0 inline, 0 general)