diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -168,8 +168,8 @@ impl RegexpDFA { } pub fn reduce(&self) -> RegexpDFA { - let equivalents = self.find_equivalent_states(); - return self.collapse_states(equivalents); + let partition = self.find_equivalent_states(); + return self.collapse_states(partition); } pub fn normalize(&self) -> RegexpDFA { @@ -230,42 +230,74 @@ impl RegexpDFA { panic!(); } - fn find_equivalent_states(&self) -> Vec<(usize, usize)> { + fn find_equivalent_states(&self) -> Vec> { let n = self.alphabet_index.len(); - let state_vec: Vec = (0..self.rules.len()/n).collect(); - let mut equivalents = HashSet::new(); - state_vec.iter().enumerate().for_each(|(i, s1)| { - equivalents.extend( - state_vec[i+1..].iter() - .filter(|s2| !(self.end_states.contains(s1)^self.end_states.contains(s2))) - .map(|s2| (*s1, *s2)) - ); - }); + let m = self.rules.len() / n; + let mut inverse_rules = vec![HashSet::::new();m*n]; - let mut m = usize::MAX; - while equivalents.len() < m { - m = equivalents.len(); - equivalents = equivalents.iter().filter(|(s1, s2)| { - !(0..n).any(|ci| { - let t1 = self.rules[s1*n + ci]; - let t2 = self.rules[s2*n + ci]; - let key = (t1.min(t2), t2.max(t1)); - return t1 != t2 && !equivalents.contains(&key); - }) - }).copied().collect(); + for i in 0..m { + for j in 0..n { + let target = self.rules[i*n + j]; + inverse_rules[target*n + j].insert(i); + } } - return Vec::from_iter(equivalents.into_iter()); + let mut set_bag = vec![ + self.end_states.clone(), + HashSet::from_iter(0..m).difference(&self.end_states).copied().collect() + ]; + let mut res = HashSet::::from([0, 1]); + let mut work = HashSet::::from([0, 1]); + + while !work.is_empty() { + let key = *work.iter().next().unwrap(); + work.remove(&key); + let target_set = set_bag[key].clone(); + for j in 0..n { + let source_set = HashSet::::from_iter(target_set.iter().flat_map(|&t| inverse_rules[t*n+j].iter()).copied()); + for k in res.clone().into_iter() { + let part = &set_bag[k]; + let intersection = part.intersection(&source_set).copied().collect::>(); + let diff = part.difference(&source_set).copied().collect::>(); + if intersection.is_empty() || diff.is_empty() { + continue; + } + res.remove(&k); + let ki = set_bag.len(); + set_bag.push(intersection); + res.insert(ki); + let kd = set_bag.len(); + set_bag.push(diff); + res.insert(kd); + if work.contains(&k) { + work.remove(&k); + work.insert(ki); + work.insert(kd); + } else if set_bag[ki].len() < set_bag[kd].len() { + work.insert(ki); + } else { + work.insert(kd); + } + } + } + } + + return Vec::from_iter(res.into_iter().map(|k| std::mem::take(&mut set_bag[k]))); } - fn collapse_states(&self, equivalents: Vec<(usize, usize)>) -> RegexpDFA { + fn collapse_states(&self, partition: Vec>) -> RegexpDFA { let n = self.alphabet_index.len(); let m = self.rules.len()/n; - let mut rules = Vec::new(); + let mut rules = vec![]; let mut eq_mapping: Vec = ((0..m)).collect(); - for (s1, s2) in equivalents.into_iter() { - eq_mapping[s2] = eq_mapping[s2].min(s1); + for eq_set in partition.into_iter() { + let mut eq_vec = Vec::from_iter(eq_set.into_iter()); + eq_vec.sort(); + let label = eq_vec[0]; + for st in eq_vec.into_iter() { + eq_mapping[st] = label; + } } let mut discard_mapping: Vec = ((0..m)).collect();