diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -275,6 +275,10 @@ class Regexp: compact_rules[index[multistate]*n + alphabet_index[c]] = index[target_tup] if any(st in self.end_states for st in target_set): end_states.add(index[target_tup]) + + fail = len(index) + compact_rules = [(st if st >= 0 else fail) for st in compact_rules] + compact_rules.extend([fail] * n) return (compact_rules, end_states, alphabet_index) @@ -295,9 +299,10 @@ class RegexpDFA: def match(self, s): st = 0 n = len(self.alphabet_index) + fail = len(self.rules) // n for c in s: - if c not in self.alphabet_index or st < 0: + if c not in self.alphabet_index or st == fail: return False key = (st*n + self.alphabet_index[c]) st = self.rules[key] @@ -312,7 +317,7 @@ class RegexpDFA: def normalize(self): n = len(self.alphabet_index) - index = {-1: -1, 0: 0} + index = {0: 0} queue = deque([0]) rules = [] @@ -322,7 +327,7 @@ class RegexpDFA: row = self.rules[si*n:(si+1)*n] for sj in row: if sj not in index: - index[sj] = len(index)-1 + index[sj] = len(index) queue.append(sj) rules.extend(index[sj] for sj in row) @@ -358,7 +363,7 @@ class RegexpDFA: for (s1, s2) in equivalents: eq_mapping[s2] = min(s1, eq_mapping.get(s2, math.inf)) - discard_mapping = {-1: -1} + discard_mapping = dict() discard_count = 0 for i in range(0, len(self.rules), n):