diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -124,7 +124,16 @@ impl RegexpNFA { compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect(); compact_rules.extend(iter::repeat(fail).take(n)); - return RegexpDFA::new(compact_rules, end_states, alphabet_index); + if compact_rules.len() > 0 { + return RegexpDFA{rules: compact_rules, end_states, alphabet_index}; + } else { + // return a minimal non-empty DFA + return RegexpDFA{ + rules: vec![1, 1], + end_states, + alphabet_index: HashMap::from([('\0', 0)]) + }; + } } } @@ -138,18 +147,9 @@ pub struct RegexpDFA { } impl RegexpDFA { - /// Construct a DFA with the provided parameters, or a minimal DFA if the parameters are empty. - pub fn new(rules: Vec, end_states: HashSet, alphabet_index: HashMap) -> RegexpDFA { - if rules.len() > 0 { - return RegexpDFA{rules, end_states, alphabet_index}; - } else { - // this saves us checking for an empty `alphabet_index` in other methods. - return RegexpDFA{ - rules: vec![1, 1], - end_states, - alphabet_index: HashMap::from([('\0', 0)]) - }; - } + pub fn new(pattern: &String) -> Result { + let nfa = RegexpNFA::new(pattern)?; + return Ok(nfa.determinize().reduce().normalize()); } /// Decide if a string matches the regexp. @@ -205,7 +205,7 @@ impl RegexpDFA { } /// Find the shortest string that is accepted by self or `r`, but not both. - /// It is expected that the automatons are already reduced and normalized. + /// The input automatons have to be already reduced and normalized (they are in the `new` constructor). pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option { if self.rules == other.rules && self.end_states == other.end_states { return None;