diff --git a/src/main.rs b/src/main.rs --- a/src/main.rs +++ b/src/main.rs @@ -1,67 +1,184 @@ use std::collections::{HashMap, HashSet}; +trait Token { + fn is_skippable(&self) -> bool {false} + fn list_first(&self) -> Vec; + fn list_last(&self) -> Vec; + fn list_neighbours(&self) -> Vec<(usize, usize)>; +} + +struct Symbol { + position: usize, + value: char +} + +struct Asterisk { + content: Box +} + +struct Chain { + content: Vec> +} + +impl Token for Symbol { + fn list_first(&self) -> Vec { + return vec![self.position]; + } + + fn list_last(&self) -> Vec { + return vec![self.position]; + } + + fn list_neighbours(&self) -> Vec<(usize, usize)> { + return vec![]; + } +} + +impl Token for Asterisk { + fn is_skippable(&self) -> bool {true} + + fn list_first(&self) -> Vec { + return self.content.list_first(); + } + + fn list_last(&self) -> Vec { + return self.content.list_last(); + } + + fn list_neighbours(&self) -> Vec<(usize, usize)> { + let mut res = self.content.list_neighbours(); + + for x in self.list_last() { + for y in self.list_first() { + res.push((x, y)); + } + } + + return res; + } +} + +impl Token for Chain { + fn list_first(&self) -> Vec { + let mut res = Vec::new(); + for token in self.content.iter() { + res.append(&mut token.list_first()); + if !token.is_skippable() {break;} + } + + return res; + } + + fn list_last(&self) -> Vec { + let mut res = Vec::new(); + for token in self.content.iter().rev() { + res.append(&mut token.list_last()); + if !token.is_skippable() {break;} + } + + return res; + } + + fn list_neighbours(&self) -> Vec<(usize, usize)> { + let mut res = Vec::new(); + let mut previous: Vec<&Box> = Vec::new(); + for token in self.content.iter() { + for t in previous.iter() { + for x in t.list_last() { + for y in token.list_first() { + res.push((x, y)); + } + } + } + res.append(&mut token.list_neighbours()); + + if token.is_skippable() { + previous.push(token); + } else { + previous = vec![token]; + } + } + + return res; + } +} + +fn find_closing_parenthesis(s: &String) -> Option { + let chars: Vec = s.chars().collect(); + let mut counter = 0; + + for (i, c) in chars.iter().enumerate() { + if *c == '(' {counter += 1;} + else if *c == ')' {counter -= 1;} + if counter == 0 {return Some(i);} + } + + return None; +} + +fn parse(pattern: &String, offset: usize) -> Chain { + let chars: Vec = pattern.chars().collect(); + let mut res: Vec> = Vec::new(); + let mut i = 0; + while i < pattern.len() { + let c = chars[i]; + match c { + '(' => { + let j = find_closing_parenthesis(&pattern[i..].to_string()).unwrap() + i; + let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1); + res.push(Box::new(inner_content)); + i = j+1; + } + '*' => { + let segment = res.pop().unwrap(); + res.push(Box::new(Asterisk{content: segment})); + i += 1; + } + c => { + res.push(Box::new(Symbol{position: i+offset, value: c})); + i += 1; + } + } + } + + return Chain{content: res}; +} + struct Regexp { rules: HashMap<(usize, char), HashSet>, end_states: HashSet } impl Regexp { - pub fn new(pattern: String) -> Self { - println!("> {pattern}"); + fn new(pattern: &String) -> Regexp { + let r = parse(pattern, 0); let pattern_chars = Vec::from_iter(pattern.chars()); - let mut star = false; - let mut plus = false; - let n = pattern.len(); - let mut states = vec![HashSet::from([n])]; - let mut end_states = HashSet::new(); - let mut rules = HashMap::new(); - - for i in 0..n { - let j = n-i-1; - let c = pattern_chars[j]; - let k = states.len()-1; - if c == '*' { - star = true; - continue; - } - if c == '+' { - plus = true; - continue; - } - if star { - states[k].insert(j); - println!("{j}: {:?}", states[k]); - Regexp::update_rule(&mut rules, j, &pattern_chars, &states[k]); - if states[k].contains(&n) {end_states.insert(j);} - star = false; - } - else if plus { - states[k].insert(j); - println!("{j}: {:?}", states[k]); - Regexp::update_rule(&mut rules, j, &pattern_chars, &states[k]); - if states[k].contains(&n) {end_states.insert(j);} - states.push(HashSet::from([j])); - plus = false; - } - else { - println!("{j}: {:?}", states[k]); - Regexp::update_rule(&mut rules, j, &pattern_chars, &states[k]); - if states[k].contains(&n) {end_states.insert(j);} - states.push(HashSet::from([j])); - } - if j == 0 { - let m = states.len()-1; - println!("99: {:?} (start)", states[m]); - Regexp::update_rule(&mut rules, 99, &pattern_chars, &states[m]); - if states[m].contains(&n) {end_states.insert(j);} - } + let mut rules: HashMap<(usize, char), HashSet> = HashMap::new(); + + for i in r.list_first() { + let c = pattern_chars[i]; + let key = (99, c); + match rules.get_mut(&key) { + Some(set) => {set.insert(i);}, + None => {rules.insert(key, HashSet::from([i]));} + }; } - println!("{rules:?}"); - return Regexp {rules, end_states}; + for (i, j) in r.list_neighbours() { + let c = pattern_chars[j]; + let key = (i, c); + match rules.get_mut(&key) { + Some(set) => {set.insert(j);}, + None => {rules.insert(key, HashSet::from([j]));} + }; + } + + let end_states = HashSet::from_iter(r.list_last().into_iter()); + + return Regexp{rules, end_states}; } - pub fn eval(self, s: String) -> bool { + pub fn eval(&self, s: String) -> bool { let mut multistate = HashSet::from([99]); for c in s.chars() { @@ -79,70 +196,16 @@ impl Regexp { return multistate.iter().any(|x| self.end_states.contains(x)); } - - fn update_rule(rules: &mut HashMap<(usize, char), HashSet>, pos: usize, pattern: &Vec, values: &HashSet) { - let n = pattern.len(); - - for &target in values.iter() { - println!("pos: {pos}, target: {target}"); - if target == n {continue;} - let c = pattern[target]; - let key = (pos, c); - if !rules.contains_key(&key) {rules.insert(key, HashSet::new());} - match rules.get_mut(&key) { - Some(set) => {set.insert(target);}, - None => {rules.insert(key, HashSet::from([target]));} - }; - } - } - - fn find_previous_letter(s: &String, k: usize) -> Option { - return s[..k].rfind(char::is_alphanumeric); - } - - fn find_next_letter(s: &String, k: usize) -> Option { - return s[k+1..].find(char::is_alphanumeric); - } - - fn find_opening_parenthesis(s: &String, k: usize) -> Option { - let chars: Vec = s.chars().collect(); - let mut i = k; - let mut counter = 1; - - while counter > 0 { - match chars[i] { - '(' => counter -= 1, - ')' => counter += 1, - _ => {} - } - match i.checked_sub(1) { - Some(x) => i = x, - None => return None - } - } - return Some(i); - } -} - -struct Solution {} - -impl Solution { - pub fn is_match(s: String, p: String) -> bool { - let regexp = Regexp::new(p); - - return regexp.eval(s); - } } fn main() { - // println!("{}\n", Solution::is_match(String::from("aa"), String::from("a"))); - // println!("{}\n", Solution::is_match(String::from("aa"), String::from("a*"))); - // println!("{}\n", Solution::is_match(String::from("ab"), String::from(".*"))); - // println!("{}\n", Solution::is_match(String::from("aab"), String::from("a*ab"))); - // println!("{}\n", Solution::is_match(String::from("aab"), String::from("a*b*"))); - println!("{}\n", Solution::is_match(String::from("a"), String::from("a+"))); - println!("{}\n", Solution::is_match(String::from("aa"), String::from("a+"))); - println!("{}\n", Solution::is_match(String::from(""), String::from("a+"))); - println!("{}\n", Solution::is_match(String::from("bb"), String::from("ba+b"))); - println!("{}\n", Solution::is_match(String::from("baaaaab"), String::from("ba+b"))); + let tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]; + for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"] { + println!("# {pattern}"); + let r = Regexp::new(&pattern.to_string()); + for &t in tests.iter() { + println!("{t} {}", r.eval(t.to_string())); + } + println!(); + } }