diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -132,6 +132,7 @@ impl Regexp { } } +#[derive(Clone)] pub struct RegexpDFA { rules: Vec, end_states: HashSet, @@ -206,7 +207,9 @@ impl RegexpDFA { return None; } - let product = self.build_product_automaton(other); + let r1 = self.expand_alphabet(&other.alphabet_index); + let r2 = other.expand_alphabet(&self.alphabet_index); + let product = r1.build_product_automaton(&r2); let n = product.alphabet_index.len(); let reverse_alphabet_index: HashMap = HashMap::from_iter(product.alphabet_index.iter().map(|(&k, &v)| (v, k))); @@ -284,6 +287,34 @@ impl RegexpDFA { return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()}; } + fn expand_alphabet(&self, alphabet_index: &HashMap) -> RegexpDFA { + if *alphabet_index == self.alphabet_index { + return self.clone(); + } + + let n1 = self.alphabet_index.len(); + let m = self.rules.len() / n1; + + let combined_alphabet: HashSet = HashSet::from_iter(self.alphabet_index.keys().chain(alphabet_index.keys()).copied()); + let mut combined_vec = Vec::from_iter(combined_alphabet.into_iter()); + combined_vec.sort(); + let combined_index = HashMap::from_iter(combined_vec.iter().enumerate().map(|(i, c)| (*c, i))); + let conversion_index: HashMap = HashMap::from_iter(self.alphabet_index.iter().map(|(k, v)| (*v, combined_index[k]))); + let n2 = combined_vec.len(); + + let mut rules = vec![]; + for i in 0..m { + let mut row = vec![m;n2]; + for (j, st) in self.rules[i*n1..(i+1)*n1].iter().enumerate() { + row[conversion_index[&j]] = *st; + } + rules.append(&mut row); + } + rules.append(&mut vec![m;n2]); + + return RegexpDFA{rules, end_states: self.end_states.clone(), alphabet_index: combined_index}.reduce().normalize(); + } + fn build_product_automaton(&self, other: &RegexpDFA) -> RegexpDFA { let n = self.alphabet_index.len(); let m = other.rules.len() / n;