diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -6,7 +6,6 @@ use token::parse; const START_NFA: usize = usize::MAX; const START_DFA: usize = 0; -const FAIL: usize = usize::MAX>>1; fn encode_set(set: &HashSet) -> String { let mut v = Vec::from_iter(set.iter()); @@ -80,6 +79,7 @@ impl Regexp { } pub fn determinize(&self) -> RegexpDFA { + const FAIL: usize = usize::MAX; let alphabet_index: HashMap = self.alphabet.iter().enumerate().map(|(i, c)| (*c, i)).collect(); let n = alphabet_index.len(); let mut compact_rules = vec![FAIL; n]; @@ -124,6 +124,10 @@ impl Regexp { } } + let fail = index_new.len(); + compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect(); + compact_rules.extend(iter::repeat(fail).take(n)); + return RegexpDFA{rules: compact_rules, end_states, alphabet_index}; } } @@ -137,6 +141,10 @@ pub struct RegexpDFA { impl RegexpDFA { pub fn eval(&self, s: String) -> bool { let n = self.alphabet_index.len(); + if n == 0 { + return s.len() == 0; + } + let fail = self.rules.len() / n; let mut state = START_DFA; for c in s.chars() { @@ -145,7 +153,7 @@ impl RegexpDFA { } else { return false; } - if state == FAIL { + if state == fail { return false; } } @@ -167,7 +175,8 @@ impl RegexpDFA { return RegexpDFA{rules: self.rules.clone(), end_states: self.end_states.clone(), alphabet_index: self.alphabet_index.clone()}; } let m = self.rules.len()/n; - let mut index: Vec = vec![FAIL;m]; + let fail = m; + let mut index: Vec = vec![fail;m]; index[0] = 0; let mut queue = VecDeque::from([START_DFA]); @@ -178,13 +187,13 @@ impl RegexpDFA { let si = queue.pop_front().unwrap(); let row = &self.rules[si*n..(si+1)*n]; for &sj in row { - if sj != FAIL && index[sj] == FAIL { + if sj != fail && index[sj] == fail { index[sj] = k; k += 1; queue.push_back(sj); } } - rules.extend(row.iter().map(|&st| if st != FAIL {index[st]} else {FAIL})); + rules.extend(row.iter().map(|&st| index[st])); } let end_states = self.end_states.iter().map(|st| index[*st]).collect(); @@ -239,10 +248,10 @@ impl RegexpDFA { continue; } discard_mapping[si] = si-discard_count; - rules.extend(self.rules[si*n..(si+1)*n].iter().map(|&st| if st!=FAIL {eq_mapping[st]} else {FAIL})); + rules.extend(self.rules[si*n..(si+1)*n].iter().map(|&st| eq_mapping[st])); } - rules = rules.into_iter().map(|st| if st!=FAIL {discard_mapping[st]} else {FAIL}).collect(); + rules = rules.into_iter().map(|st| discard_mapping[st]).collect(); let end_states = self.end_states.iter().map(|st| discard_mapping[eq_mapping[*st]]).collect(); return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};