use std::collections::{HashMap, HashSet}; const START: usize = usize::MAX; trait Token { fn is_skippable(&self) -> bool {false} fn list_first(&self) -> Vec; fn list_last(&self) -> Vec; fn list_neighbours(&self) -> Vec<(usize, usize)>; } struct Symbol { position: usize, value: char } struct Asterisk { content: Box } struct Plus { content: Box } struct Chain { content: Vec> } impl Token for Symbol { fn list_first(&self) -> Vec { return vec![self.position]; } fn list_last(&self) -> Vec { return vec![self.position]; } fn list_neighbours(&self) -> Vec<(usize, usize)> { return vec![]; } } impl Token for Asterisk { fn is_skippable(&self) -> bool {true} fn list_first(&self) -> Vec { return self.content.list_first(); } fn list_last(&self) -> Vec { return self.content.list_last(); } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = self.content.list_neighbours(); for x in self.list_last() { for y in self.list_first() { res.push((x, y)); } } return res; } } impl Token for Plus { fn list_first(&self) -> Vec { return self.content.list_first(); } fn list_last(&self) -> Vec { return self.content.list_last(); } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = self.content.list_neighbours(); for x in self.list_last() { for y in self.list_first() { res.push((x, y)); } } return res; } } impl Token for Chain { fn is_skippable(&self) -> bool { return self.content.iter().all(|x| x.is_skippable()); } fn list_first(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter() { res.append(&mut token.list_first()); if !token.is_skippable() {break;} } return res; } fn list_last(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter().rev() { res.append(&mut token.list_last()); if !token.is_skippable() {break;} } return res; } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = Vec::new(); let mut previous: Vec<&Box> = Vec::new(); for token in self.content.iter() { for t in previous.iter() { for x in t.list_last() { for y in token.list_first() { res.push((x, y)); } } } res.append(&mut token.list_neighbours()); if token.is_skippable() { previous.push(token); } else { previous = vec![token]; } } return res; } } fn find_closing_parenthesis(s: &String) -> Option { let chars: Vec = s.chars().collect(); let mut counter = 0; for (i, c) in chars.iter().enumerate() { if *c == '(' {counter += 1;} else if *c == ')' {counter -= 1;} if counter == 0 {return Some(i);} } return None; } fn parse(pattern: &String, offset: usize) -> Chain { let chars: Vec = pattern.chars().collect(); let mut res: Vec> = Vec::new(); let mut i = 0; while i < pattern.len() { let c = chars[i]; match c { '(' => { let j = find_closing_parenthesis(&pattern[i..].to_string()).unwrap() + i; let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1); res.push(Box::new(inner_content)); i = j+1; } '*' => { let token = res.pop().unwrap(); res.push(Box::new(Asterisk{content: token})); i += 1; } '+' => { let token = res.pop().unwrap(); res.push(Box::new(Plus{content: token})); i += 1; } c => { res.push(Box::new(Symbol{position: i+offset, value: c})); i += 1; } } } return Chain{content: res}; } struct Regexp { rules: HashMap<(usize, char), HashSet>, end_states: HashSet } impl Regexp { fn new(pattern: &String) -> Regexp { let r = parse(pattern, 0); let pattern_chars = Vec::from_iter(pattern.chars()); let mut rules: HashMap<(usize, char), HashSet> = HashMap::new(); for i in r.list_first() { let c = pattern_chars[i]; let key = (START, c); match rules.get_mut(&key) { Some(set) => {set.insert(i);}, None => {rules.insert(key, HashSet::from([i]));} }; } for (i, j) in r.list_neighbours() { let c = pattern_chars[j]; let key = (i, c); match rules.get_mut(&key) { Some(set) => {set.insert(j);}, None => {rules.insert(key, HashSet::from([j]));} }; } let mut end_states = HashSet::from_iter(r.list_last().into_iter()); if r.is_skippable() { end_states.insert(START); } return Regexp{rules, end_states}; } pub fn eval(&self, s: String) -> bool { let mut multistate = HashSet::from([START]); for c in s.chars() { let mut new_multistate = HashSet::new(); for state in multistate { if let Some(x) = self.rules.get(&(state, c)) { new_multistate = new_multistate.union(&x).map(|&y| y).collect(); } else if let Some(x) = self.rules.get(&(state, '.')) { new_multistate = new_multistate.union(&x).map(|&y| y).collect(); } } multistate = new_multistate; } return multistate.iter().any(|x| self.end_states.contains(x)); } } fn main() { let tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]; for pattern in ["a*b*", "a+b+", "(ab)*", "(ab)+", "a((bc)*d)*"] { println!("# {pattern}"); let r = Regexp::new(&pattern.to_string()); for &t in tests.iter() { println!("{t} {}", r.eval(t.to_string())); } println!(); } }