# HG changeset patch # User Laman # Date 2024-06-24 11:55:22 # Node ID 95db38ce684666a85779753b6c55a7fb8449866d # Parent 4f7b6352013d562a3a5b78fd4dd7cce1cb863015 refactoring: changed the states representation from usize and u64 to i32 diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -4,26 +4,27 @@ mod token; pub use token::ParsingError; use token::parse; -const START: usize = usize::MAX; -const FAIL: usize = START-1; +const START: i32 = -1; +const FAIL: i32 = -2; -fn encode_set(set: &HashSet) -> u64 { +fn encode_set(set: &HashSet) -> i32 { let mut res = 0; for x in set.iter() { + assert!(*x >= 0); res ^= 1<HashSet { - if x == START as u64 {return HashSet::from([START]);} +fn decode_set(x: i32) -> HashSet { + if x == START {return HashSet::from([START]);} let mut x = x; - let mut res: HashSet = HashSet::new(); + let mut res: HashSet = HashSet::new(); while x > 0 { let y = x.trailing_zeros(); - res.insert(y as usize); + res.insert(y as i32); x ^= 1 << y; } @@ -32,35 +33,35 @@ fn decode_set(x: u64) ->HashSet { #[derive(Debug)] pub struct Regexp { - rules: HashMap<(usize, char), HashSet>, - end_states: HashSet + rules: HashMap<(i32, char), HashSet>, + end_states: HashSet } impl Regexp { pub fn new(pattern: &String) -> Result { let r = parse(pattern, 0)?; let pattern_chars = Vec::from_iter(pattern.chars()); - let mut rules: HashMap<(usize, char), HashSet> = HashMap::new(); + let mut rules: HashMap<(i32, char), HashSet> = HashMap::new(); for i in r.list_first() { let c = pattern_chars[i]; let key = (START, c); match rules.get_mut(&key) { - Some(set) => {set.insert(i);}, - None => {rules.insert(key, HashSet::from([i]));} + Some(set) => {set.insert(i as i32);}, + None => {rules.insert(key, HashSet::from([i as i32]));} }; } for (i, j) in r.list_neighbours() { let c = pattern_chars[j]; - let key = (i, c); + let key = (i as i32, c); match rules.get_mut(&key) { - Some(set) => {set.insert(j);}, - None => {rules.insert(key, HashSet::from([j]));} + Some(set) => {set.insert(j as i32);}, + None => {rules.insert(key, HashSet::from([j as i32]));} }; } - let mut end_states = HashSet::from_iter(r.list_last().into_iter()); + let mut end_states = HashSet::from_iter(r.list_last().into_iter().map(|i| i as i32)); if r.is_skippable() { end_states.insert(START); } @@ -88,16 +89,16 @@ impl Regexp { } pub fn determinize(&self) -> RegexpDFA { - let mut rules: HashMap<(u64, char), u64> = HashMap::new(); - let mut end_states: HashSet = HashSet::new(); - if self.end_states.contains(&START) {end_states.insert(START as u64);} + let mut rules: HashMap<(i32, char), i32> = HashMap::new(); + let mut end_states: HashSet = HashSet::new(); + if self.end_states.contains(&START) {end_states.insert(START);} - let mut stack = Vec::from([START as u64]); + let mut stack = Vec::from([START]); let mut processed_states = HashSet::new(); while !stack.is_empty() { let state = stack.pop().unwrap(); let multistate = decode_set(state); - let mut new_rules: HashMap> = HashMap::new(); + let mut new_rules: HashMap> = HashMap::new(); for key in self.rules.keys().filter(|key| multistate.contains(&key.0)) { let (_st, c) = key; @@ -127,13 +128,13 @@ impl Regexp { } pub struct RegexpDFA { - rules: HashMap<(u64, char), u64>, - end_states: HashSet + rules: HashMap<(i32, char), i32>, + end_states: HashSet } impl RegexpDFA { pub fn eval(&self, s: String) -> bool { - let mut state = START as u64; + let mut state = START; for c in s.chars() { if let Some(x) = self.rules.get(&(state, c)) { @@ -152,34 +153,35 @@ impl RegexpDFA { } pub fn normalize(&self) -> RegexpDFA { - let mut index = HashMap::from([(START as u64, START as u64)]); - let mut queue = VecDeque::from([START as u64]); + let mut index = HashMap::from([(START, START)]); + let mut queue = VecDeque::from([START]); while !queue.is_empty() { let state = queue.pop_front().unwrap(); - let mut edges: Vec<((u64, char), u64)> = self.rules.iter().filter(|((st, c), t)| *st == state).map(|((st, c), t)| ((*st, *c), *t)).collect(); + let mut edges: Vec<((i32, char), i32)> = self.rules.iter() + .filter(|((st, c), t)| *st == state) + .map(|((st, c), t)| ((*st, *c), *t)).collect(); edges.sort(); for ((_st, _c), t) in edges { if !index.contains_key(&t) { - index.insert(t, index.len() as u64); + index.insert(t, index.len() as i32); queue.push_back(t); } } } - let rules = self.rules.iter().map(|((st, c), t)| ((index[st as &u64], *c), index[t as &u64])).collect(); + let rules = self.rules.iter().map(|((st, c), t)| ((index[st], *c), index[t])).collect(); let end_states = self.end_states.iter().map(|st| index[st]).collect(); return RegexpDFA{rules, end_states}; } - fn find_equivalent_states(&self) -> Vec<(u64, u64)> { - let state_set: HashSet = HashSet::from_iter(self.rules.values().copied()); - let mut state_vec: Vec = Vec::from_iter(state_set.into_iter()); - state_vec.push(START as u64); - state_vec.push(FAIL as u64); + fn find_equivalent_states(&self) -> Vec<(i32, i32)> { + let state_set: HashSet = HashSet::from_iter(self.rules.values().copied()); + let mut state_vec: Vec = Vec::from_iter(state_set.into_iter()); + state_vec.push(START); + state_vec.push(FAIL); state_vec.sort(); - state_vec.reverse(); let alphabet: HashSet = self.rules.keys().map(|(_st, c)| c).copied().collect(); let mut equivalents = HashSet::new(); @@ -196,8 +198,8 @@ impl RegexpDFA { n = equivalents.len(); equivalents = equivalents.iter().filter(|(s1, s2)| { !alphabet.iter().any(|c| { - let t1 = self.rules.get(&(*s1, *c)).unwrap_or(&(FAIL as u64)); - let t2 = self.rules.get(&(*s2, *c)).unwrap_or(&(FAIL as u64)); + let t1 = self.rules.get(&(*s1, *c)).unwrap_or(&FAIL); + let t2 = self.rules.get(&(*s2, *c)).unwrap_or(&FAIL); let key = (*t1.min(t2), *t1.max(t2)); return t1 != t2 && !equivalents.contains(&key); }) @@ -207,7 +209,7 @@ impl RegexpDFA { return Vec::from_iter(equivalents.into_iter()); } - fn collapse_states(&self, mut equivalents: Vec<(u64, u64)>) -> RegexpDFA { + fn collapse_states(&self, mut equivalents: Vec<(i32, i32)>) -> RegexpDFA { let mut rules = self.rules.clone(); let mut end_states = self.end_states.clone(); equivalents.sort();