Changeset - 1790bcb433e3
[Not reviewed]
default
0 2 0
Laman - 10 months ago 2024-06-30 17:02:12

fixes: never create an empty automaton, removed the erroneous early fail
2 files changed with 17 insertions and 19 deletions:
0 comments (0 inline, 0 general)
regexp.py
Show inline comments
 
@@ -294,9 +294,9 @@ class Regexp:
 

	
 
class RegexpDFA:
 
	def __init__(self, rules, end_states, alphabet_index):
 
		self.rules = rules
 
		self.rules = rules or [1, 1]
 
		self.end_states = end_states
 
		self.alphabet_index = alphabet_index
 
		self.alphabet_index = alphabet_index or {"": 0}
 

	
 
	@classmethod
 
	def create(cls, pattern):
 
@@ -308,10 +308,9 @@ class RegexpDFA:
 
	def match(self, s):
 
		st = 0
 
		n = len(self.alphabet_index)
 
		fail = len(self.rules) // n
 

	
 
		for c in s:
 
			if c not in self.alphabet_index or st == fail:
 
			if c not in self.alphabet_index:
 
				return False
 
			key = (st*n + self.alphabet_index[c])
 
			st = self.rules[key]
src/regexp.rs
Show inline comments
 
@@ -97,7 +97,7 @@ impl Regexp {
 
			let multistate = &index_multi[&state_hash];
 
			let mut new_rules: HashMap<char, HashSet<usize>> = HashMap::new();
 

	
 
			for key in self.rules.keys().filter(|key| multistate.contains(&key.0)) {
 
			for key in self.rules.keys().filter(|(st, _c)| multistate.contains(st)) {
 
				let (_st, c) = key;
 
				if !new_rules.contains_key(c) {
 
					new_rules.insert(*c, HashSet::new());
 
@@ -128,7 +128,7 @@ impl Regexp {
 
		compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect();
 
		compact_rules.extend(iter::repeat(fail).take(n));
 

	
 
		return RegexpDFA{rules: compact_rules, end_states, alphabet_index};
 
		return RegexpDFA::new(compact_rules, end_states, alphabet_index);
 
	}
 
}
 

	
 
@@ -140,12 +140,20 @@ pub struct RegexpDFA {
 
}
 

	
 
impl RegexpDFA {
 
	pub fn new(rules: Vec<usize>, end_states: HashSet<usize>, alphabet_index: HashMap<char, usize>) -> RegexpDFA {
 
		if rules.len() > 0 {
 
			return RegexpDFA{rules, end_states, alphabet_index};
 
		} else {
 
			return RegexpDFA{
 
				rules: vec![1, 1],
 
				end_states,
 
				alphabet_index: HashMap::from([('\0', 0)])
 
			};
 
		}
 
	}
 

	
 
	pub fn eval(&self, s: String) -> bool {
 
		let n = self.alphabet_index.len();
 
		if n == 0 {
 
			return s.len() == 0;
 
		}
 
		let fail = self.rules.len() / n;
 
		let mut state = START_DFA;
 

	
 
		for c in s.chars() {
 
@@ -154,27 +162,18 @@ impl RegexpDFA {
 
			} else {
 
				return false;
 
			}
 
			if state == fail {
 
				return false;
 
			}
 
		}
 

	
 
		return self.end_states.contains(&state);
 
	}
 

	
 
	pub fn reduce(&self) -> RegexpDFA {
 
		if self.alphabet_index.len() == 0 {
 
			return RegexpDFA{rules: self.rules.clone(), end_states: self.end_states.clone(), alphabet_index: self.alphabet_index.clone()};
 
		}
 
		let equivalents = self.find_equivalent_states();
 
		return self.collapse_states(equivalents);
 
	}
 

	
 
	pub fn normalize(&self) -> RegexpDFA {
 
		let n = self.alphabet_index.len();
 
		if n == 0 {
 
			return RegexpDFA{rules: self.rules.clone(), end_states: self.end_states.clone(), alphabet_index: self.alphabet_index.clone()}; 
 
		}
 
		let m = self.rules.len()/n;
 
		let fail = m;
 
		let mut index: Vec<usize> = vec![fail;m];
0 comments (0 inline, 0 general)