diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -1,4 +1,4 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap, HashSet, VecDeque}; mod token; pub use token::ParsingError; @@ -151,6 +151,28 @@ impl RegexpDFA { return self.collapse_states(equivalents); } + pub fn normalize(&self) -> RegexpDFA { + let mut index = HashMap::from([(START as u64, START as u64)]); + let mut queue = VecDeque::from([START as u64]); + + while !queue.is_empty() { + let state = queue.pop_front().unwrap(); + let mut edges: Vec<((u64, char), u64)> = self.rules.iter().filter(|((st, c), t)| *st == state).map(|((st, c), t)| ((*st, *c), *t)).collect(); + edges.sort(); + for ((_st, _c), t) in edges { + if !index.contains_key(&t) { + index.insert(t, index.len() as u64); + queue.push_back(t); + } + } + } + + let rules = self.rules.iter().map(|((st, c), t)| ((index[st as &u64], *c), index[t as &u64])).collect(); + let end_states = self.end_states.iter().map(|st| index[st]).collect(); + + return RegexpDFA{rules, end_states}; + } + fn find_equivalent_states(&self) -> Vec<(u64, u64)> { let state_set: HashSet = HashSet::from_iter(self.rules.values().copied()); let mut state_vec: Vec = Vec::from_iter(state_set.into_iter());