use std::collections::{HashMap, HashSet}; const START: usize = usize::MAX; trait Token { fn is_skippable(&self) -> bool {false} fn list_first(&self) -> Vec<usize>; fn list_last(&self) -> Vec<usize>; fn list_neighbours(&self) -> Vec<(usize, usize)>; } struct Symbol { position: usize, value: char } struct Asterisk { content: Box<dyn Token> } struct Plus { content: Box<dyn Token> } struct Chain { content: Vec<Box<dyn Token>> } impl Token for Symbol { fn list_first(&self) -> Vec<usize> { return vec![self.position]; } fn list_last(&self) -> Vec<usize> { return vec![self.position]; } fn list_neighbours(&self) -> Vec<(usize, usize)> { return vec![]; } } impl Token for Asterisk { fn is_skippable(&self) -> bool {true} fn list_first(&self) -> Vec<usize> { return self.content.list_first(); } fn list_last(&self) -> Vec<usize> { return self.content.list_last(); } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = self.content.list_neighbours(); for x in self.list_last() { for y in self.list_first() { res.push((x, y)); } } return res; } } impl Token for Plus { fn list_first(&self) -> Vec<usize> { return self.content.list_first(); } fn list_last(&self) -> Vec<usize> { return self.content.list_last(); } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = self.content.list_neighbours(); for x in self.list_last() { for y in self.list_first() { res.push((x, y)); } } return res; } } impl Token for Chain { fn is_skippable(&self) -> bool { return self.content.iter().all(|x| x.is_skippable()); } fn list_first(&self) -> Vec<usize> { let mut res = Vec::new(); for token in self.content.iter() { res.append(&mut token.list_first()); if !token.is_skippable() {break;} } return res; } fn list_last(&self) -> Vec<usize> { let mut res = Vec::new(); for token in self.content.iter().rev() { res.append(&mut token.list_last()); if !token.is_skippable() {break;} } return res; } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = Vec::new(); let mut previous: Vec<&Box<dyn Token>> = Vec::new(); for token in self.content.iter() { for t in previous.iter() { for x in t.list_last() { for y in token.list_first() { res.push((x, y)); } } } res.append(&mut token.list_neighbours()); if token.is_skippable() { previous.push(token); } else { previous = vec![token]; } } return res; } } fn find_closing_parenthesis(s: &String) -> Option<usize> { let chars: Vec<char> = s.chars().collect(); let mut counter = 0; for (i, c) in chars.iter().enumerate() { if *c == '(' {counter += 1;} else if *c == ')' {counter -= 1;} if counter == 0 {return Some(i);} } return None; } fn parse(pattern: &String, offset: usize) -> Chain { let chars: Vec<char> = pattern.chars().collect(); let mut res: Vec<Box<dyn Token>> = Vec::new(); let mut i = 0; while i < pattern.len() { let c = chars[i]; match c { '(' => { let j = find_closing_parenthesis(&pattern[i..].to_string()).unwrap() + i; let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1); res.push(Box::new(inner_content)); i = j+1; } '*' => { let token = res.pop().unwrap(); res.push(Box::new(Asterisk{content: token})); i += 1; } '+' => { let token = res.pop().unwrap(); res.push(Box::new(Plus{content: token})); i += 1; } c => { res.push(Box::new(Symbol{position: i+offset, value: c})); i += 1; } } } return Chain{content: res}; } fn encode_set(set: &HashSet<usize>) -> u64 { let mut res = 0; for x in set.iter() { res ^= 1<<x; } return res; } fn decode_set(x: u64) ->HashSet<usize> { if x == START as u64 {return HashSet::from([START]);} let mut x = x; let mut res: HashSet<usize> = HashSet::new(); while x > 0 { let y = x.trailing_zeros(); res.insert(y as usize); x ^= 1 << y; } return res; } struct Regexp { rules: HashMap<(usize, char), HashSet<usize>>, end_states: HashSet<usize> } impl Regexp { fn new(pattern: &String) -> Regexp { let r = parse(pattern, 0); let pattern_chars = Vec::from_iter(pattern.chars()); let mut rules: HashMap<(usize, char), HashSet<usize>> = HashMap::new(); for i in r.list_first() { let c = pattern_chars[i]; let key = (START, c); match rules.get_mut(&key) { Some(set) => {set.insert(i);}, None => {rules.insert(key, HashSet::from([i]));} }; } for (i, j) in r.list_neighbours() { let c = pattern_chars[j]; let key = (i, c); match rules.get_mut(&key) { Some(set) => {set.insert(j);}, None => {rules.insert(key, HashSet::from([j]));} }; } let mut end_states = HashSet::from_iter(r.list_last().into_iter()); if r.is_skippable() { end_states.insert(START); } return Regexp{rules, end_states}; } pub fn eval(&self, s: String) -> bool { let mut multistate = HashSet::from([START]); for c in s.chars() { let mut new_multistate = HashSet::new(); for state in multistate { if let Some(x) = self.rules.get(&(state, c)) { new_multistate = new_multistate.union(&x).map(|&y| y).collect(); } else if let Some(x) = self.rules.get(&(state, '.')) { new_multistate = new_multistate.union(&x).map(|&y| y).collect(); } } multistate = new_multistate; } return multistate.iter().any(|x| self.end_states.contains(x)); } fn determinize(&self) -> RegexpDFA { let mut rules: HashMap<(u64, char), u64> = HashMap::new(); let mut end_states: HashSet<u64> = HashSet::new(); if self.end_states.contains(&START) {end_states.insert(START as u64);} let mut stack = Vec::from([START as u64]); let mut processed_states = HashSet::new(); while !stack.is_empty() { let state = stack.pop().unwrap(); let multistate = decode_set(state); let mut new_rules: HashMap<char, HashSet<usize>> = HashMap::new(); for key in self.rules.keys().filter(|key| multistate.contains(&key.0)) { let (_st, c) = key; if !new_rules.contains_key(c) { new_rules.insert(*c, HashSet::new()); } for target in &self.rules[key] { new_rules.get_mut(c).unwrap().insert(*target); } } for (c, target_set) in new_rules.into_iter() { let encoded_target = encode_set(&target_set); rules.insert((state, c), encoded_target); if target_set.iter().any(|st| self.end_states.contains(st)) { end_states.insert(encoded_target); } if !processed_states.contains(&encoded_target) { stack.push(encoded_target); processed_states.insert(encoded_target); } } } return RegexpDFA{rules, end_states}; } } struct RegexpDFA { rules: HashMap<(u64, char), u64>, end_states: HashSet<u64> } impl RegexpDFA { fn eval(&self, s: String) -> bool { let mut state = START as u64; for c in s.chars() { if let Some(x) = self.rules.get(&(state, c)) { state = *x; } else { return false; } } return self.end_states.contains(&state); } } fn main() { let tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]; for pattern in ["a*b*", "a+b+", "(ab)*", "(ab)+", "a((bc)*d)*"] { println!("# {pattern}"); let r = Regexp::new(&pattern.to_string()).determinize(); for &t in tests.iter() { println!("{t} {}", r.eval(t.to_string())); } println!(); } }