diff --git a/src/regexp/token.rs b/src/regexp/token.rs --- a/src/regexp/token.rs +++ b/src/regexp/token.rs @@ -18,7 +18,7 @@ impl fmt::Display for ParsingError { write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}") }, ParsingError::ClosingParenthesis {s, pos} => { - write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}") + write!(f, "A closing parenthesis not found. Pattern \"{s}\", position {pos}") }, ParsingError::EmptyAlternativeVariant => { write!(f, "Found an empty Alternative variant.") @@ -27,27 +27,34 @@ impl fmt::Display for ParsingError { } } +/// A single letter or other alphabet symbol. pub struct Symbol { + /// Symbol position in the regular expression. position: usize } +/// A unary operator specifying its content to occur zero or more times. pub struct Asterisk { content: Box } +/// An operator with a variable number of arguments, specifying exchangeable alternatives. pub struct Alternative { content: Vec> } +/// An operator expressing a concatenation of its content Tokens. pub struct Chain { content: Vec> } +/// Enum encapsulating possible items of a regular expression. pub enum Token { - Lambda, + Lambda, // An empty string, useful as an `Alternative`. Symbol(Symbol), Asterisk(Asterisk), Alternative(Alternative), + // A special token to temporarily separate Alternative variants. Removed in the Alternative constructor. AlternativeSeparator, Chain(Chain) } @@ -89,6 +96,8 @@ impl Asterisk { } impl Alternative { + /// Split a sequence of `Tokens` by `AlternativeSeparator` into alternative variants and return the result. + /// If any variant is empty, return an `Err``. fn new(content: Vec>) -> Result { let mut variants: Vec>> = vec![Vec::new()]; @@ -112,7 +121,7 @@ impl Alternative { } fn is_skippable(&self) -> bool { - return self.content.iter().any(|x| x.is_skippable()); + return self.content.iter().any(|x| x.is_skippable()); } fn list_first(&self) -> Vec { @@ -193,51 +202,57 @@ impl Chain { } impl Token { + /// Decide whether the `Token` has to contain some `Symbols`, + /// or whether it can be stepped over when looking for first, last and neighbouring items. pub fn is_skippable(&self) -> bool { match self { Token::Lambda => true, Token::Symbol(_) => false, Token::Asterisk(_) => true, Token::Alternative(t) => t.is_skippable(), - Token::AlternativeSeparator => panic!(), + Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"), Token::Chain(t) => t.is_skippable() } } + /// List all possible string positions the token can start with. pub fn list_first(&self) -> Vec { match self { Token::Lambda => vec![], Token::Symbol(t) => t.list_first(), Token::Asterisk(t) => t.list_first(), Token::Alternative(t) => t.list_first(), - Token::AlternativeSeparator => panic!(), + Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"), Token::Chain(t) => t.list_first() } } + /// List all possible string positions the token can end with. pub fn list_last(&self) -> Vec { match self { Token::Lambda => vec![], Token::Symbol(t) => t.list_last(), Token::Asterisk(t) => t.list_last(), Token::Alternative(t) => t.list_last(), - Token::AlternativeSeparator => panic!(), + Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"), Token::Chain(t) => t.list_last() } } + /// List positions of all possibly neighbouring subtokens. pub fn list_neighbours(&self) -> Vec<(usize, usize)> { match self { Token::Lambda => vec![], Token::Symbol(t) => t.list_neighbours(), Token::Asterisk(t) => t.list_neighbours(), Token::Alternative(t) => t.list_neighbours(), - Token::AlternativeSeparator => panic!(), + Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"), Token::Chain(t) => t.list_neighbours() } } } +/// For a string starting with a parenthesis, find its matching closing parenthesis, or return None. fn find_closing_parenthesis(s: &String) -> Option { let chars: Vec = s.chars().collect(); let mut counter = 0; @@ -251,6 +266,10 @@ fn find_closing_parenthesis(s: &String) return None; } +/// Recursively parse the pattern into a `Token` tree. +/// +/// The `offset` defines where the `pattern` starts relative to the original pattern, +/// to record correct global token positions in the subcalls pub fn parse(pattern: &String, offset: usize) -> Result { let chars: Vec = pattern.chars().collect(); let mut res: Vec> = Vec::new();