use std::{borrow::Borrow, fmt}; #[derive(Debug, Clone)] pub enum ParsingError { Asterisk {s: String, pos: usize}, OpeningParenthesis {s: String, pos: usize}, ClosingParenthesis {s: String, pos: usize}, EmptyAlternativeVariant } impl fmt::Display for ParsingError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { ParsingError::Asterisk {s, pos} => { write!(f, "The asterisk operator is missing an argument. Pattern \"{s}\", position {pos}") }, ParsingError::OpeningParenthesis {s, pos} => { write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}") }, ParsingError::ClosingParenthesis {s, pos} => { write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}") }, ParsingError::EmptyAlternativeVariant => { write!(f, "Found an empty Alternative variant.") } } } } pub struct Symbol { position: usize } pub struct Asterisk { content: Box } pub struct Alternative { content: Vec> } pub struct Chain { content: Vec> } pub enum Token { Lambda, Symbol(Symbol), Asterisk(Asterisk), Alternative(Alternative), AlternativeSeparator, Chain(Chain) } impl Symbol { fn list_first(&self) -> Vec { return vec![self.position]; } fn list_last(&self) -> Vec { return vec![self.position]; } fn list_neighbours(&self) -> Vec<(usize, usize)> { return vec![]; } } impl Asterisk { fn list_first(&self) -> Vec { return self.content.list_first(); } fn list_last(&self) -> Vec { return self.content.list_last(); } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = self.content.list_neighbours(); for x in self.list_last() { for y in self.list_first() { res.push((x, y)); } } return res; } } impl Alternative { fn new(content: Vec>) -> Result { let mut variants: Vec>> = vec![Vec::new()]; content.into_iter().for_each(|x| { if matches!(x.borrow(), Token::AlternativeSeparator) { variants.push(Vec::new()); } else { variants.last_mut().unwrap().push(x); } }); if variants.iter().any(|x| x.is_empty()) { return Err(ParsingError::EmptyAlternativeVariant); } return Ok(Alternative{ content: variants.into_iter().map( |x| Box::new(Token::Chain(Chain{content: x})) ).collect() }); } fn is_skippable(&self) -> bool { return self.content.iter().any(|x| x.is_skippable()); } fn list_first(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter() { res.append(&mut token.list_first()); } return res; } fn list_last(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter() { res.append(&mut token.list_last()); } return res; } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = Vec::new(); for token in self.content.iter() { res.append(&mut token.list_neighbours()); } return res; } } impl Chain { fn is_skippable(&self) -> bool { return self.content.iter().all(|x| x.is_skippable()); } fn list_first(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter() { res.append(&mut token.list_first()); if !token.is_skippable() {break;} } return res; } fn list_last(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter().rev() { res.append(&mut token.list_last()); if !token.is_skippable() {break;} } return res; } fn list_neighbours(&self) -> Vec<(usize, usize)> { let mut res = Vec::new(); let mut previous: Vec<&Box> = Vec::new(); for token in self.content.iter() { for t in previous.iter() { for x in t.list_last() { for y in token.list_first() { res.push((x, y)); } } } res.append(&mut token.list_neighbours()); if token.is_skippable() { previous.push(token); } else { previous = vec![token]; } } return res; } } impl Token { pub fn is_skippable(&self) -> bool { match self { Token::Lambda => true, Token::Symbol(_) => false, Token::Asterisk(_) => true, Token::Alternative(t) => t.is_skippable(), Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.is_skippable() } } pub fn list_first(&self) -> Vec { match self { Token::Lambda => vec![], Token::Symbol(t) => t.list_first(), Token::Asterisk(t) => t.list_first(), Token::Alternative(t) => t.list_first(), Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.list_first() } } pub fn list_last(&self) -> Vec { match self { Token::Lambda => vec![], Token::Symbol(t) => t.list_last(), Token::Asterisk(t) => t.list_last(), Token::Alternative(t) => t.list_last(), Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.list_last() } } pub fn list_neighbours(&self) -> Vec<(usize, usize)> { match self { Token::Lambda => vec![], Token::Symbol(t) => t.list_neighbours(), Token::Asterisk(t) => t.list_neighbours(), Token::Alternative(t) => t.list_neighbours(), Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.list_neighbours() } } } fn find_closing_parenthesis(s: &String) -> Option { let chars: Vec = s.chars().collect(); let mut counter = 0; for (i, c) in chars.iter().enumerate() { if *c == '(' {counter += 1;} else if *c == ')' {counter -= 1;} if counter == 0 {return Some(i);} } return None; } pub fn parse(pattern: &String, offset: usize) -> Result { let chars: Vec = pattern.chars().collect(); let mut res: Vec> = Vec::new(); let mut is_alternative = false; let mut i = 0; while i < pattern.len() { let c = chars[i]; match c { '(' => { let j = find_closing_parenthesis(&pattern[i..].to_string()).ok_or(ParsingError::ClosingParenthesis {s: pattern.clone(), pos: i})? + i; let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1)?; res.push(Box::new(inner_content)); i = j+1; } '*' => { let token = res.pop().ok_or(ParsingError::Asterisk{s: pattern.clone(), pos: i})?; res.push(Box::new(Token::Asterisk(Asterisk{content: token}))); i += 1; } ')' => { return Err(ParsingError::OpeningParenthesis {s: pattern.clone(), pos: i}); } '|' | '+' => { is_alternative = true; res.push(Box::new(Token::AlternativeSeparator)); i += 1; } '_' => { res.push(Box::new(Token::Lambda)); i += 1; } _c => { res.push(Box::new(Token::Symbol(Symbol{position: i+offset}))); i += 1; } } } if is_alternative { return Ok(Token::Alternative(Alternative::new(res)?)); } else { return Ok(Token::Chain(Chain{content: res})); } } mod test { use super::*; #[test] fn test_closing_parenthesis() { let s = "()"; assert_eq!(find_closing_parenthesis(&s.to_string()), Some(1)); } }