Regular-Expresso Changeset

Changeset - f0f9655e62ee

Parent rev.

Child rev.

[Not reviewed]

default

0 3 0

Laman - 11 months ago 2024-06-29 20:53:59

added a method for finding a string distinguishing between two regexps

3 files changed with 115 insertions and 3 deletions:

regexp.py

src/main.rs

src/regexp.rs

0 comments (0 inline, 0 general) First comment

regexp.py

➞

Show inline comments

@@ @@ -332,10 +332,31 @@ class RegexpDFA: @@
 					queue.append(sj)
 			rules.extend(index[sj] for sj in row)
 		end_states = {index[si] for si in self.end_states}
+		end_states = {index[si] for si in self.end_states if si in index}
 		return RegexpDFA(rules, end_states, self.alphabet_index)
 	def find_distinguishing_string(self, r):
 		if self.rules == r.rules and self.end_states == r.end_states:
 			return None
 		product = self._build_product_automaton(r)
 		n = len(product.alphabet_index)
 		reverse_alphabet_index = {v: k for (k, v) in product.alphabet_index.items()}
 		queue = deque([(0, "")])
 		visited = {0}
 		while queue:
 			(state, acc) = queue.popleft()
 			if state in product.end_states:
 				return acc
 			for (i, target) in enumerate(product.rules[state*n:(state+1)*n]):
 				if target not in visited:
 					queue.append((target, acc+reverse_alphabet_index[i]))
 					visited.add(target)
 		assert False
 	def _find_equivalent_states(self):
 		n = len(self.alphabet_index)
 		state_list = list(range(len(self.rules) // n))
@@ @@ -380,6 +401,24 @@ class RegexpDFA: @@
 		return (rules, end_states)
 	def _build_product_automaton(self, r):
 		n = len(self.alphabet_index)
 		m = len(r.rules) // n
 		k = len(self.rules) // n
 		rules = []
 		end_states = set()
 		for s1 in range(k):
 			row1 = self.rules[s1*n:(s1+1)*n]
 			for s2 in range(m):
 				row2 = r.rules[s2*n:(s2+1)*n]
 				rules.extend([x*m + y for (x, y) in zip(row1, row2)])
 				if (s1 in self.end_states) != (s2 in r.end_states):
 					end_states.add(s1*m + s2)
 		return RegexpDFA(rules, end_states, self.alphabet_index).reduce().normalize()
 def test():
 	tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]
@@ @@ -408,6 +447,11 @@ if __name__ == "__main__": @@
 	match_parser.add_argument("string")
 	match_parser.set_defaults(name="match")
 	compare_parser = subparsers.add_parser("compare")
 	compare_parser.add_argument("pattern1")
 	compare_parser.add_argument("pattern2")
 	compare_parser.set_defaults(name="compare")
 	args = parser.parse_args()
 	if args.name == "test":
@@ @@ -419,5 +463,9 @@ if __name__ == "__main__": @@
 			print("Failed to parse the regexp:")
 			print(e)
 		print(r.match(args.string))
 	elif args.name == "compare":
 		r1 = RegexpDFA.create(args.pattern1).reduce().normalize()
 		r2 = RegexpDFA.create(args.pattern2).reduce().normalize()
 		print(r1.find_distinguishing_string(r2))
 	else:
 		parser.print_help()
@@ \ No newline at end of file @@
 		parser.print_help()

src/main.rs

➞

Show inline comments

@@ @@ -31,7 +31,22 @@ fn main() { @@
+				}
 			};
 			println!("{}", r.eval(args[3].to_string()));
+		}
 		},
 		"compare" => {
 			let r1 = match Regexp::new(&args[2].to_string()) {
 				Ok(r) => r.determinize().reduce().normalize(),
 				Err(e) => {
 					panic!("ERROR: {e}");
+				}
 			};
 			let r2 = match Regexp::new(&args[3].to_string()) {
 				Ok(r) => r.determinize().reduce().normalize(),
 				Err(e) => {
 					panic!("ERROR: {e}");
+				}
 			};
 			println!("{}", r1.find_distinguishing_string(&r2).unwrap_or("None".to_string()));
 		},
 		s => {
 			println!("An unknown command: \"{s}\". Use \"match\" or \"test\".")
+		}

src/regexp.rs

➞

Show inline comments

@@ @@ -201,6 +201,33 @@ impl RegexpDFA { @@
 		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};
+	}
 	pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option<String> {
 		if self.rules == other.rules && self.end_states == other.end_states {
 			return None;
+		}
 		let product = self.build_product_automaton(other);
 		let n = product.alphabet_index.len();
 		let reverse_alphabet_index: HashMap<usize, char> = HashMap::from_iter(product.alphabet_index.iter().map(|(&k, &v)| (v, k)));
 		let mut queue = VecDeque::from([(0, "".to_string())]);
 		let mut visited = HashSet::new();
 		while !queue.is_empty() {
 			let (state, acc) = queue.pop_front().unwrap();
 			if product.end_states.contains(&state) {
 				return Some(acc);
+			}
 			for (i, target) in product.rules[state*n..(state+1)*n].iter().enumerate() {
 				if !visited.contains(target) {
 					queue.push_back((*target, acc.clone()+&String::from(reverse_alphabet_index[&i])));
 					visited.insert(target);
+				}
+			}
+		}
 		panic!();
+	}
 	fn find_equivalent_states(&self) -> Vec<(usize, usize)> {
 		let n = self.alphabet_index.len();
 		let state_vec: Vec<usize> = (0..self.rules.len()/n).collect();
@@ @@ -256,4 +283,26 @@ impl RegexpDFA { @@
 		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};
+	}
 	fn build_product_automaton(&self, other: &RegexpDFA) -> RegexpDFA {
 		let n = self.alphabet_index.len();
 		let m = other.rules.len() / n;
 		let k = self.rules.len() / n;
 		let mut rules = vec![];
 		let mut end_states = HashSet::new();
 		for s1 in 0..k {
 			let row1 = &self.rules[s1*n..(s1+1)*n];
 			for s2 in 0..m {
 				let row2 = &other.rules[s2*n..(s2+1)*n];
 				rules.extend(row1.iter().zip(row2.iter()).map(|(x, y)| x*m + y));
 				if (self.end_states.contains(&s1)) ^ (other.end_states.contains(&s2)) {
 					end_states.insert(s1*m + s2);
+				}
+			}
+		}
 		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()}.reduce().normalize();
+	}
+}

0 comments (0 inline, 0 general) First comment