diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -7,28 +7,11 @@ use token::parse; const START: i32 = -1; const FAIL: i32 = -2; -fn encode_set(set: &HashSet) -> i32 { - let mut res = 0; - for x in set.iter() { - assert!(*x >= 0); - res ^= 1< HashSet { - if x == START {return HashSet::from([START]);} - - let mut x = x; - let mut res: HashSet = HashSet::new(); - - while x > 0 { - let y = x.trailing_zeros(); - res.insert(y as i32); - x ^= 1 << y; - } - - return res; +fn encode_set(set: &HashSet) -> String { + let mut v = Vec::from_iter(set.iter()); + v.sort(); + let res: Vec = v.into_iter().map(|x| x.to_string()).collect(); + return res.join(","); } #[derive(Debug)] @@ -93,11 +76,13 @@ impl Regexp { let mut end_states: HashSet = HashSet::new(); if self.end_states.contains(&START) {end_states.insert(START);} - let mut stack = Vec::from([START]); - let mut processed_states = HashSet::new(); + let mut index_new = HashMap::from([(START.to_string(), START)]); + let mut index_multi = HashMap::from([(START.to_string(), HashSet::from([START]))]); + let mut stack = Vec::from([START.to_string()]); + while !stack.is_empty() { - let state = stack.pop().unwrap(); - let multistate = decode_set(state); + let state_hash = stack.pop().unwrap(); + let multistate = &index_multi[&state_hash]; let mut new_rules: HashMap> = HashMap::new(); for key in self.rules.keys().filter(|key| multistate.contains(&key.0)) { @@ -111,14 +96,17 @@ impl Regexp { } for (c, target_set) in new_rules.into_iter() { - let encoded_target = encode_set(&target_set); - rules.insert((state, c), encoded_target); - if target_set.iter().any(|st| self.end_states.contains(st)) { - end_states.insert(encoded_target); + let target_hash = encode_set(&target_set); + let is_end = target_set.iter().any(|st| self.end_states.contains(st)); + if !index_new.contains_key(&target_hash) { + let target_new = index_new.len() as i32; + index_new.insert(target_hash.clone(), target_new); + index_multi.insert(target_hash.clone(), target_set); + stack.push(target_hash.clone()); } - if !processed_states.contains(&encoded_target) { - stack.push(encoded_target); - processed_states.insert(encoded_target); + rules.insert((index_new[&state_hash], c), index_new[&target_hash]); + if is_end { + end_states.insert(index_new[&target_hash]); } } }