diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -294,9 +294,9 @@ class Regexp: class RegexpDFA: def __init__(self, rules, end_states, alphabet_index): - self.rules = rules + self.rules = rules or [1, 1] self.end_states = end_states - self.alphabet_index = alphabet_index + self.alphabet_index = alphabet_index or {"": 0} @classmethod def create(cls, pattern): @@ -308,10 +308,9 @@ class RegexpDFA: def match(self, s): st = 0 n = len(self.alphabet_index) - fail = len(self.rules) // n for c in s: - if c not in self.alphabet_index or st == fail: + if c not in self.alphabet_index: return False key = (st*n + self.alphabet_index[c]) st = self.rules[key] diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -97,7 +97,7 @@ impl Regexp { let multistate = &index_multi[&state_hash]; let mut new_rules: HashMap> = HashMap::new(); - for key in self.rules.keys().filter(|key| multistate.contains(&key.0)) { + for key in self.rules.keys().filter(|(st, _c)| multistate.contains(st)) { let (_st, c) = key; if !new_rules.contains_key(c) { new_rules.insert(*c, HashSet::new()); @@ -127,8 +127,8 @@ impl Regexp { let fail = index_new.len(); compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect(); compact_rules.extend(iter::repeat(fail).take(n)); - - return RegexpDFA{rules: compact_rules, end_states, alphabet_index}; + + return RegexpDFA::new(compact_rules, end_states, alphabet_index); } } @@ -140,12 +140,20 @@ pub struct RegexpDFA { } impl RegexpDFA { + pub fn new(rules: Vec, end_states: HashSet, alphabet_index: HashMap) -> RegexpDFA { + if rules.len() > 0 { + return RegexpDFA{rules, end_states, alphabet_index}; + } else { + return RegexpDFA{ + rules: vec![1, 1], + end_states, + alphabet_index: HashMap::from([('\0', 0)]) + }; + } + } + pub fn eval(&self, s: String) -> bool { let n = self.alphabet_index.len(); - if n == 0 { - return s.len() == 0; - } - let fail = self.rules.len() / n; let mut state = START_DFA; for c in s.chars() { @@ -154,27 +162,18 @@ impl RegexpDFA { } else { return false; } - if state == fail { - return false; - } } return self.end_states.contains(&state); } pub fn reduce(&self) -> RegexpDFA { - if self.alphabet_index.len() == 0 { - return RegexpDFA{rules: self.rules.clone(), end_states: self.end_states.clone(), alphabet_index: self.alphabet_index.clone()}; - } let equivalents = self.find_equivalent_states(); return self.collapse_states(equivalents); } pub fn normalize(&self) -> RegexpDFA { let n = self.alphabet_index.len(); - if n == 0 { - return RegexpDFA{rules: self.rules.clone(), end_states: self.end_states.clone(), alphabet_index: self.alphabet_index.clone()}; - } let m = self.rules.len()/n; let fail = m; let mut index: Vec = vec![fail;m];