Changeset - 1a11f659e7b5
[Not reviewed]
default
0 1 0
Laman - 11 months ago 2024-06-11 22:29:10

the prototype rewritten to Rust
1 file changed with 177 insertions and 114 deletions:
src/main.rs
177
114
0 comments (0 inline, 0 general)
src/main.rs
Show inline comments
 
use std::collections::{HashMap, HashSet};
 

	
 
trait Token {
 
	fn is_skippable(&self) -> bool {false}
 
	fn list_first(&self) -> Vec<usize>;
 
	fn list_last(&self) -> Vec<usize>;
 
	fn list_neighbours(&self) -> Vec<(usize, usize)>;
 
}
 

	
 
struct Symbol {
 
	position: usize,
 
	value: char
 
}
 

	
 
struct Asterisk {
 
	content: Box<dyn Token>
 
}
 

	
 
struct Chain {
 
	content: Vec<Box<dyn Token>>
 
}
 

	
 
impl Token for Symbol {
 
	fn list_first(&self) -> Vec<usize> {
 
		return vec![self.position];
 
	}
 

	
 
	fn list_last(&self) -> Vec<usize> {
 
		return vec![self.position];
 
	}
 

	
 
	fn list_neighbours(&self) -> Vec<(usize, usize)> {
 
		return vec![];
 
	}
 
}
 

	
 
impl Token for Asterisk {
 
	fn is_skippable(&self) -> bool {true}
 

	
 
	fn list_first(&self) -> Vec<usize> {
 
		return self.content.list_first();
 
	}
 

	
 
	fn list_last(&self) -> Vec<usize> {
 
		return self.content.list_last();
 
	}
 

	
 
	fn list_neighbours(&self) -> Vec<(usize, usize)> {
 
		let mut res = self.content.list_neighbours();
 

	
 
		for x in self.list_last() {
 
			for y in self.list_first() {
 
				res.push((x, y));
 
			}
 
		}
 

	
 
		return res;
 
	}
 
}
 

	
 
impl Token for Chain {
 
	fn list_first(&self) -> Vec<usize> {
 
		let mut res = Vec::new();
 
		for token in self.content.iter() {
 
			res.append(&mut token.list_first());
 
			if !token.is_skippable() {break;}
 
		}
 

	
 
		return res;
 
	}
 

	
 
	fn list_last(&self) -> Vec<usize> {
 
		let mut res = Vec::new();
 
		for token in self.content.iter().rev() {
 
			res.append(&mut token.list_last());
 
			if !token.is_skippable() {break;}
 
		}
 

	
 
		return res;
 
	}
 

	
 
	fn list_neighbours(&self) -> Vec<(usize, usize)> {
 
		let mut res = Vec::new();
 
		let mut previous: Vec<&Box<dyn Token>> = Vec::new();
 
		for token in self.content.iter() {
 
			for t in previous.iter() {
 
				for x in t.list_last() {
 
					for y in token.list_first() {
 
						res.push((x, y));
 
					}
 
				}
 
			}
 
			res.append(&mut token.list_neighbours());
 

	
 
			if token.is_skippable() {
 
				previous.push(token);
 
			} else {
 
				previous = vec![token];
 
			}
 
		}
 

	
 
		return res;
 
	}
 
}
 

	
 
fn find_closing_parenthesis(s: &String) -> Option<usize> {
 
	let chars: Vec<char> = s.chars().collect();
 
	let mut counter = 0;
 

	
 
	for (i, c) in chars.iter().enumerate() {
 
		if *c == '(' {counter += 1;}
 
		else if *c == ')' {counter -= 1;}
 
		if counter == 0 {return Some(i);}
 
	}
 

	
 
	return None;
 
}
 

	
 
fn parse(pattern: &String, offset: usize) -> Chain {
 
	let chars: Vec<char> = pattern.chars().collect();
 
	let mut res: Vec<Box<dyn Token>> = Vec::new();
 
	let mut i = 0;
 
	while i < pattern.len() {
 
		let c = chars[i];
 
		match c {
 
			'(' => {
 
				let j = find_closing_parenthesis(&pattern[i..].to_string()).unwrap() + i;
 
				let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1);
 
				res.push(Box::new(inner_content));
 
				i = j+1;
 
			}
 
			'*' => {
 
				let segment = res.pop().unwrap();
 
				res.push(Box::new(Asterisk{content: segment}));
 
				i += 1;
 
			}
 
			c => {
 
				res.push(Box::new(Symbol{position: i+offset, value: c}));
 
				i += 1;
 
			}
 
		}
 
	}
 

	
 
	return Chain{content: res};
 
}
 

	
 
struct Regexp {
 
	rules: HashMap<(usize, char), HashSet<usize>>,
 
	end_states: HashSet<usize>
 
}
 

	
 
impl Regexp {
 
	pub fn new(pattern: String) -> Self {
 
		println!("> {pattern}");
 
	fn new(pattern: &String) -> Regexp {
 
		let r = parse(pattern, 0);
 
		let pattern_chars = Vec::from_iter(pattern.chars());
 
		let mut star = false;
 
		let mut plus = false;
 
		let n = pattern.len();
 
		let mut states = vec![HashSet::from([n])];
 
		let mut end_states = HashSet::new();
 
		let mut rules = HashMap::new();
 

	
 
		for i in 0..n {
 
			let j = n-i-1;
 
			let c = pattern_chars[j];
 
			let k = states.len()-1;
 
			if c == '*' {
 
				star = true;
 
				continue;
 
			}
 
			if c == '+' {
 
				plus = true;
 
				continue;
 
			}
 
			if star {
 
				states[k].insert(j);
 
				println!("{j}: {:?}", states[k]);
 
				Regexp::update_rule(&mut rules, j, &pattern_chars, &states[k]);
 
				if states[k].contains(&n) {end_states.insert(j);}
 
				star = false;
 
			}
 
			else if plus {
 
				states[k].insert(j);
 
				println!("{j}: {:?}", states[k]);
 
				Regexp::update_rule(&mut rules, j, &pattern_chars, &states[k]);
 
				if states[k].contains(&n) {end_states.insert(j);}
 
				states.push(HashSet::from([j]));
 
				plus = false;
 
			}
 
			else {
 
				println!("{j}: {:?}", states[k]);
 
				Regexp::update_rule(&mut rules, j, &pattern_chars, &states[k]);
 
				if states[k].contains(&n) {end_states.insert(j);}
 
				states.push(HashSet::from([j]));
 
			}
 
			if j == 0 {
 
				let m = states.len()-1;
 
				println!("99: {:?} (start)", states[m]);
 
				Regexp::update_rule(&mut rules, 99, &pattern_chars, &states[m]);
 
				if states[m].contains(&n) {end_states.insert(j);}
 
			}
 
		let mut rules: HashMap<(usize, char), HashSet<usize>> = HashMap::new();
 
		
 
		for i in r.list_first() {
 
			let c = pattern_chars[i];
 
			let key = (99, c);
 
			match rules.get_mut(&key) {
 
				Some(set) => {set.insert(i);},
 
				None => {rules.insert(key, HashSet::from([i]));}
 
			};
 
		}
 

	
 
		println!("{rules:?}");
 
		return Regexp {rules, end_states};
 
		for (i, j) in r.list_neighbours() {
 
			let c = pattern_chars[j];
 
			let key = (i, c);
 
			match rules.get_mut(&key) {
 
				Some(set) => {set.insert(j);},
 
				None => {rules.insert(key, HashSet::from([j]));}
 
			};
 
		}
 

	
 
		let end_states = HashSet::from_iter(r.list_last().into_iter());
 

	
 
		return Regexp{rules, end_states};
 
	}
 

	
 
	pub fn eval(self, s: String) -> bool {
 
	pub fn eval(&self, s: String) -> bool {
 
		let mut multistate = HashSet::from([99]);
 

	
 
		for c in s.chars() {
 
@@ -79,70 +196,16 @@ impl Regexp {
 

	
 
		return multistate.iter().any(|x| self.end_states.contains(x));
 
	}
 

	
 
	fn update_rule(rules: &mut HashMap<(usize, char), HashSet<usize>>, pos: usize, pattern: &Vec<char>, values: &HashSet<usize>) {
 
		let n = pattern.len();
 

	
 
		for &target in values.iter() {
 
			println!("pos: {pos}, target: {target}");
 
			if target == n {continue;}
 
			let c = pattern[target];
 
			let key = (pos, c);
 
			if !rules.contains_key(&key) {rules.insert(key, HashSet::new());}
 
			match rules.get_mut(&key) {
 
				Some(set) => {set.insert(target);},
 
				None => {rules.insert(key, HashSet::from([target]));}
 
			};
 
		}
 
	}
 

	
 
	fn find_previous_letter(s: &String, k: usize) -> Option<usize> {
 
		return s[..k].rfind(char::is_alphanumeric);
 
	}
 

	
 
	fn find_next_letter(s: &String, k: usize) -> Option<usize> {
 
		return s[k+1..].find(char::is_alphanumeric);
 
	}
 

	
 
	fn find_opening_parenthesis(s: &String, k: usize) -> Option<usize> {
 
		let chars: Vec<char> = s.chars().collect();
 
		let mut i = k;
 
		let mut counter = 1;
 
		
 
		while counter > 0 {
 
			match chars[i] {
 
				'(' => counter -= 1,
 
				')' => counter += 1,
 
				_ => {}
 
			}
 
			match i.checked_sub(1) {
 
				Some(x) => i = x,
 
				None => return None
 
			}
 
		}
 
		return Some(i);
 
	}
 
}
 

	
 
struct Solution {}
 

	
 
impl Solution {
 
	pub fn is_match(s: String, p: String) -> bool {
 
		let regexp = Regexp::new(p);
 

	
 
		return regexp.eval(s);
 
	}
 
}
 

	
 
fn main() {
 
	// println!("{}\n", Solution::is_match(String::from("aa"), String::from("a")));
 
	// println!("{}\n", Solution::is_match(String::from("aa"), String::from("a*")));
 
	// println!("{}\n", Solution::is_match(String::from("ab"), String::from(".*")));
 
	// println!("{}\n", Solution::is_match(String::from("aab"), String::from("a*ab")));
 
	// println!("{}\n", Solution::is_match(String::from("aab"), String::from("a*b*")));
 
	println!("{}\n", Solution::is_match(String::from("a"), String::from("a+")));
 
	println!("{}\n", Solution::is_match(String::from("aa"), String::from("a+")));
 
	println!("{}\n", Solution::is_match(String::from(""), String::from("a+")));
 
	println!("{}\n", Solution::is_match(String::from("bb"), String::from("ba+b")));
 
	println!("{}\n", Solution::is_match(String::from("baaaaab"), String::from("ba+b")));
 
	let tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"];
 
	for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"] {
 
		println!("# {pattern}");
 
		let r = Regexp::new(&pattern.to_string());
 
		for &t in tests.iter() {
 
			println!("{t} {}", r.eval(t.to_string()));
 
		}
 
		println!();
 
	}
 
}
0 comments (0 inline, 0 general)