# HG changeset patch # User Laman # Date 2024-07-03 21:33:39 # Node ID d2dfbcc8bb96befbf8995cd94895ad44ff593531 # Parent f03565ba6137f91c9da3daf5756cb29248138f13 refactoring: determinization indexing states with vecs instead of string hashes diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -7,13 +7,6 @@ use token::parse; const START_NFA: usize = usize::MAX; const START_DFA: usize = 0; -fn encode_set(set: &HashSet) -> String { - let mut v = Vec::from_iter(set.iter()); - v.sort(); - let res: Vec = v.into_iter().map(|x| x.to_string()).collect(); - return res.join(","); -} - #[derive(Debug)] pub struct Regexp { rules: HashMap<(usize, char), HashSet>, @@ -86,18 +79,14 @@ impl Regexp { let mut end_states: HashSet = HashSet::new(); if self.end_states.contains(&START_NFA) {end_states.insert(START_DFA);} - // string hash -> single int DFA state - let mut index_new = HashMap::from([(START_NFA.to_string(), START_DFA)]); - // string hash -> HashSet NFA multistate - let mut index_multi = HashMap::from([(START_NFA.to_string(), HashSet::from([START_NFA]))]); - let mut stack = Vec::from([START_NFA.to_string()]); + let mut index = HashMap::from([(vec![START_NFA], START_DFA)]); + let mut stack = vec![vec![START_NFA]]; while !stack.is_empty() { - let state_hash = stack.pop().unwrap(); - let multistate = &index_multi[&state_hash]; + let multistate = stack.pop().unwrap(); let mut new_rules: HashMap> = HashMap::new(); - for key in self.rules.keys().filter(|(st, _c)| multistate.contains(st)) { + for key in self.rules.keys().filter(|(st, _c)| multistate.binary_search(st).is_ok()) { let (_st, c) = key; if !new_rules.contains_key(c) { new_rules.insert(*c, HashSet::new()); @@ -108,23 +97,23 @@ impl Regexp { } for (c, target_set) in new_rules.into_iter() { - let target_hash = encode_set(&target_set); - let is_end = target_set.iter().any(|st| self.end_states.contains(st)); - if !index_new.contains_key(&target_hash) { - let target_new = index_new.len(); - index_new.insert(target_hash.clone(), target_new); - index_multi.insert(target_hash.clone(), target_set); + let mut target_vec = Vec::from_iter(target_set.into_iter()); + target_vec.sort(); + let is_end = target_vec.iter().any(|st| self.end_states.contains(st)); + if !index.contains_key(&target_vec) { + let target_new = index.len(); + index.insert(target_vec.clone(), target_new); compact_rules.extend(iter::repeat(FAIL).take(n)); - stack.push(target_hash.clone()); + stack.push(target_vec.clone()); } - compact_rules[index_new[&state_hash]*n + alphabet_index[&c]] = index_new[&target_hash]; + compact_rules[index[&multistate]*n + alphabet_index[&c]] = index[&target_vec]; if is_end { - end_states.insert(index_new[&target_hash]); + end_states.insert(index[&target_vec]); } } } - let fail = index_new.len(); + let fail = index.len(); compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect(); compact_rules.extend(iter::repeat(fail).take(n));