diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -275,6 +275,10 @@ class Regexp: compact_rules[index[multistate]*n + alphabet_index[c]] = index[target_tup] if any(st in self.end_states for st in target_set): end_states.add(index[target_tup]) + + fail = len(index) + compact_rules = [(st if st >= 0 else fail) for st in compact_rules] + compact_rules.extend([fail] * n) return (compact_rules, end_states, alphabet_index) @@ -295,9 +299,10 @@ class RegexpDFA: def match(self, s): st = 0 n = len(self.alphabet_index) + fail = len(self.rules) // n for c in s: - if c not in self.alphabet_index or st < 0: + if c not in self.alphabet_index or st == fail: return False key = (st*n + self.alphabet_index[c]) st = self.rules[key] @@ -312,7 +317,7 @@ class RegexpDFA: def normalize(self): n = len(self.alphabet_index) - index = {-1: -1, 0: 0} + index = {0: 0} queue = deque([0]) rules = [] @@ -322,7 +327,7 @@ class RegexpDFA: row = self.rules[si*n:(si+1)*n] for sj in row: if sj not in index: - index[sj] = len(index)-1 + index[sj] = len(index) queue.append(sj) rules.extend(index[sj] for sj in row) @@ -358,7 +363,7 @@ class RegexpDFA: for (s1, s2) in equivalents: eq_mapping[s2] = min(s1, eq_mapping.get(s2, math.inf)) - discard_mapping = {-1: -1} + discard_mapping = dict() discard_count = 0 for i in range(0, len(self.rules), n): diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -6,7 +6,6 @@ use token::parse; const START_NFA: usize = usize::MAX; const START_DFA: usize = 0; -const FAIL: usize = usize::MAX>>1; fn encode_set(set: &HashSet) -> String { let mut v = Vec::from_iter(set.iter()); @@ -80,6 +79,7 @@ impl Regexp { } pub fn determinize(&self) -> RegexpDFA { + const FAIL: usize = usize::MAX; let alphabet_index: HashMap = self.alphabet.iter().enumerate().map(|(i, c)| (*c, i)).collect(); let n = alphabet_index.len(); let mut compact_rules = vec![FAIL; n]; @@ -124,6 +124,10 @@ impl Regexp { } } + let fail = index_new.len(); + compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect(); + compact_rules.extend(iter::repeat(fail).take(n)); + return RegexpDFA{rules: compact_rules, end_states, alphabet_index}; } } @@ -137,6 +141,10 @@ pub struct RegexpDFA { impl RegexpDFA { pub fn eval(&self, s: String) -> bool { let n = self.alphabet_index.len(); + if n == 0 { + return s.len() == 0; + } + let fail = self.rules.len() / n; let mut state = START_DFA; for c in s.chars() { @@ -145,7 +153,7 @@ impl RegexpDFA { } else { return false; } - if state == FAIL { + if state == fail { return false; } } @@ -167,7 +175,8 @@ impl RegexpDFA { return RegexpDFA{rules: self.rules.clone(), end_states: self.end_states.clone(), alphabet_index: self.alphabet_index.clone()}; } let m = self.rules.len()/n; - let mut index: Vec = vec![FAIL;m]; + let fail = m; + let mut index: Vec = vec![fail;m]; index[0] = 0; let mut queue = VecDeque::from([START_DFA]); @@ -178,13 +187,13 @@ impl RegexpDFA { let si = queue.pop_front().unwrap(); let row = &self.rules[si*n..(si+1)*n]; for &sj in row { - if sj != FAIL && index[sj] == FAIL { + if sj != fail && index[sj] == fail { index[sj] = k; k += 1; queue.push_back(sj); } } - rules.extend(row.iter().map(|&st| if st != FAIL {index[st]} else {FAIL})); + rules.extend(row.iter().map(|&st| index[st])); } let end_states = self.end_states.iter().map(|st| index[*st]).collect(); @@ -239,10 +248,10 @@ impl RegexpDFA { continue; } discard_mapping[si] = si-discard_count; - rules.extend(self.rules[si*n..(si+1)*n].iter().map(|&st| if st!=FAIL {eq_mapping[st]} else {FAIL})); + rules.extend(self.rules[si*n..(si+1)*n].iter().map(|&st| eq_mapping[st])); } - rules = rules.into_iter().map(|st| if st!=FAIL {discard_mapping[st]} else {FAIL}).collect(); + rules = rules.into_iter().map(|st| discard_mapping[st]).collect(); let end_states = self.end_states.iter().map(|st| discard_mapping[eq_mapping[*st]]).collect(); return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};