diff --git a/src/regexp/token.rs b/src/regexp/token.rs --- a/src/regexp/token.rs +++ b/src/regexp/token.rs @@ -1,11 +1,12 @@ -use std::fmt; +use std::{borrow::Borrow, fmt}; #[derive(Debug, Clone)] pub enum ParsingError { Asterisk {s: String, pos: usize}, Plus {s: String, pos: usize}, OpeningParenthesis {s: String, pos: usize}, - ClosingParenthesis {s: String, pos: usize} + ClosingParenthesis {s: String, pos: usize}, + EmptyAlternativeVariant } impl fmt::Display for ParsingError { @@ -22,6 +23,9 @@ impl fmt::Display for ParsingError { }, ParsingError::ClosingParenthesis {s, pos} => { write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}") + }, + ParsingError::EmptyAlternativeVariant => { + write!(f, "Found an empty Alternative variant.") } } } @@ -39,6 +43,10 @@ pub struct Plus { content: Box } +pub struct Alternative { + content: Vec> +} + pub struct Chain { content: Vec> } @@ -47,6 +55,8 @@ pub enum Token { Symbol(Symbol), Asterisk(Asterisk), Plus(Plus), + Alternative(Alternative), + AlternativeSeparator, Chain(Chain) } @@ -108,6 +118,61 @@ impl Plus { } } +impl Alternative { + fn new(content: Vec>) -> Result { + let mut variants: Vec>> = vec![Vec::new()]; + + content.into_iter().for_each(|x| { + if matches!(x.borrow(), Token::AlternativeSeparator) { + variants.push(Vec::new()); + } else { + variants.last_mut().unwrap().push(x); + } + }); + + if variants.iter().any(|x| x.is_empty()) { + return Err(ParsingError::EmptyAlternativeVariant); + } + + return Ok(Alternative{ + content: variants.into_iter().map( + |x| Box::new(Token::Chain(Chain{content: x})) + ).collect() + }); + } + + fn is_skippable(&self) -> bool { + return self.content.iter().any(|x| x.is_skippable()); + } + + fn list_first(&self) -> Vec { + let mut res = Vec::new(); + for token in self.content.iter() { + res.append(&mut token.list_first()); + } + + return res; + } + + fn list_last(&self) -> Vec { + let mut res = Vec::new(); + for token in self.content.iter() { + res.append(&mut token.list_last()); + } + + return res; + } + + fn list_neighbours(&self) -> Vec<(usize, usize)> { + let mut res = Vec::new(); + for token in self.content.iter() { + res.append(&mut token.list_neighbours()); + } + + return res; + } +} + impl Chain { fn is_skippable(&self) -> bool { return self.content.iter().all(|x| x.is_skippable()); @@ -163,6 +228,8 @@ impl Token { Token::Symbol(_) => false, Token::Asterisk(_) => true, Token::Plus(_) => false, + Token::Alternative(t) => t.is_skippable(), + Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.is_skippable() } } @@ -172,6 +239,8 @@ impl Token { Token::Symbol(t) => t.list_first(), Token::Asterisk(t) => t.list_first(), Token::Plus(t) => t.list_first(), + Token::Alternative(t) => t.list_first(), + Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.list_first() } } @@ -181,6 +250,8 @@ impl Token { Token::Symbol(t) => t.list_last(), Token::Asterisk(t) => t.list_last(), Token::Plus(t) => t.list_last(), + Token::Alternative(t) => t.list_last(), + Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.list_last() } } @@ -190,6 +261,8 @@ impl Token { Token::Symbol(t) => t.list_neighbours(), Token::Asterisk(t) => t.list_neighbours(), Token::Plus(t) => t.list_neighbours(), + Token::Alternative(t) => t.list_neighbours(), + Token::AlternativeSeparator => panic!(), Token::Chain(t) => t.list_neighbours() } } @@ -211,6 +284,7 @@ fn find_closing_parenthesis(s: &String) pub fn parse(pattern: &String, offset: usize) -> Result { let chars: Vec = pattern.chars().collect(); let mut res: Vec> = Vec::new(); + let mut is_alternative = false; let mut i = 0; while i < pattern.len() { let c = chars[i]; @@ -234,6 +308,11 @@ pub fn parse(pattern: &String, offset: u ')' => { return Err(ParsingError::OpeningParenthesis {s: pattern.clone(), pos: i}); } + '|' => { + is_alternative = true; + res.push(Box::new(Token::AlternativeSeparator)); + i += 1; + } _c => { res.push(Box::new(Token::Symbol(Symbol{position: i+offset}))); i += 1; @@ -241,7 +320,11 @@ pub fn parse(pattern: &String, offset: u } } - return Ok(Token::Chain(Chain{content: res})); + if is_alternative { + return Ok(Token::Alternative(Alternative::new(res)?)); + } else { + return Ok(Token::Chain(Chain{content: res})); + } } mod test {