Files @ bac2e7f972b5
Branch filter:

Location: Regular-Expresso/src/regexp/token.rs

bac2e7f972b5 6.8 KiB application/rls-services+xml Show Annotation Show as Raw Download as Raw
Laman
added a license
use std::{borrow::Borrow, fmt};

#[derive(Debug, Clone)]
pub enum ParsingError {
	Asterisk {s: String, pos: usize},
	OpeningParenthesis {s: String, pos: usize},
	ClosingParenthesis {s: String, pos: usize},
	EmptyAlternativeVariant
}

impl fmt::Display for ParsingError {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		match self {
			ParsingError::Asterisk {s, pos} => {
				write!(f, "The asterisk operator is missing an argument. Pattern \"{s}\", position {pos}")
			},
			ParsingError::OpeningParenthesis {s, pos} => {
				write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::ClosingParenthesis {s, pos} => {
				write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::EmptyAlternativeVariant => {
				write!(f, "Found an empty Alternative variant.")
			}
		}
	}
}

pub struct Symbol {
	position: usize
}

pub struct Asterisk {
	content: Box<Token>
}

pub struct Alternative {
	content: Vec<Box<Token>>
}

pub struct Chain {
	content: Vec<Box<Token>>
}

pub enum Token {
	Lambda,
	Symbol(Symbol),
	Asterisk(Asterisk),
	Alternative(Alternative),
	AlternativeSeparator,
	Chain(Chain)
}

impl Symbol {
	fn list_first(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_last(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		return vec![];
	}
}

impl Asterisk {
	fn list_first(&self) -> Vec<usize> {
		return self.content.list_first();
	}

	fn list_last(&self) -> Vec<usize> {
		return self.content.list_last();
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = self.content.list_neighbours();

		for x in self.list_last() {
			for y in self.list_first() {
				res.push((x, y));
			}
		}

		return res;
	}
}

impl Alternative {
	fn new(content: Vec<Box<Token>>) -> Result<Alternative, ParsingError> {
		let mut variants: Vec<Vec<Box<Token>>> = vec![Vec::new()];

		content.into_iter().for_each(|x| {
			if matches!(x.borrow(), Token::AlternativeSeparator) {
				variants.push(Vec::new());
			} else {
				variants.last_mut().unwrap().push(x);
			}
		});

		if variants.iter().any(|x| x.is_empty()) {
			return Err(ParsingError::EmptyAlternativeVariant);
		}

		return Ok(Alternative{
			content: variants.into_iter().map(
				|x| Box::new(Token::Chain(Chain{content: x}))
			).collect()
		});
	}

	fn is_skippable(&self) -> bool {
			return self.content.iter().any(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_last());
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_neighbours());
		}

		return res;
	}
}

impl Chain {
	fn is_skippable(&self) -> bool {
		return self.content.iter().all(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter().rev() {
			res.append(&mut token.list_last());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		let mut previous: Vec<&Box<Token>> = Vec::new();
		for token in self.content.iter() {
			for t in previous.iter() {
				for x in t.list_last() {
					for y in token.list_first() {
						res.push((x, y));
					}
				}
			}
			res.append(&mut token.list_neighbours());

			if token.is_skippable() {
				previous.push(token);
			} else {
				previous = vec![token];
			}
		}

		return res;
	}
}

impl Token {
	pub fn is_skippable(&self) -> bool {
		match self {
			Token::Lambda => true,
			Token::Symbol(_) => false,
			Token::Asterisk(_) => true,
			Token::Alternative(t) => t.is_skippable(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.is_skippable()
		}
	}

	pub fn list_first(&self) -> Vec<usize> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_first(),
			Token::Asterisk(t) => t.list_first(),
			Token::Alternative(t) => t.list_first(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_first()
		}
	}

	pub fn list_last(&self) -> Vec<usize> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_last(),
			Token::Asterisk(t) => t.list_last(),
			Token::Alternative(t) => t.list_last(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_last()
		}
	}

	pub fn list_neighbours(&self) -> Vec<(usize, usize)> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_neighbours(),
			Token::Asterisk(t) => t.list_neighbours(),
			Token::Alternative(t) => t.list_neighbours(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_neighbours()
		}
	}
}

fn find_closing_parenthesis(s: &String) -> Option<usize> {
	let chars: Vec<char> = s.chars().collect();
	let mut counter = 0;

	for (i, c) in chars.iter().enumerate() {
		if *c == '(' {counter += 1;}
		else if *c == ')' {counter -= 1;}
		if counter == 0 {return Some(i);}
	}

	return None;
}

pub fn parse(pattern: &String, offset: usize) -> Result<Token, ParsingError> {
	let chars: Vec<char> = pattern.chars().collect();
	let mut res: Vec<Box<Token>> = Vec::new();
	let mut is_alternative = false;
	let mut i = 0;
	while i < pattern.len() {
		let c = chars[i];
		match c {
			'(' => {
				let j = find_closing_parenthesis(&pattern[i..].to_string()).ok_or(ParsingError::ClosingParenthesis {s: pattern.clone(), pos: i})? + i;
				let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1)?;
				res.push(Box::new(inner_content));
				i = j+1;
			}
			'*' => {
				let token = res.pop().ok_or(ParsingError::Asterisk{s: pattern.clone(), pos: i})?;
				res.push(Box::new(Token::Asterisk(Asterisk{content: token})));
				i += 1;
			}
			')' => {
				return Err(ParsingError::OpeningParenthesis {s: pattern.clone(), pos: i});
			}
			'|' | '+' => {
				is_alternative = true;
				res.push(Box::new(Token::AlternativeSeparator));
				i += 1;
			}
			'_' => {
				res.push(Box::new(Token::Lambda));
				i += 1;
			}
			_c => {
				res.push(Box::new(Token::Symbol(Symbol{position: i+offset})));
				i += 1;
			}
		}
	}

	if is_alternative {
		return Ok(Token::Alternative(Alternative::new(res)?));
	} else {
		return Ok(Token::Chain(Chain{content: res}));
	}
}

mod test {
	use super::*;

	#[test]
	fn test_closing_parenthesis() {
		let s = "()";
		assert_eq!(find_closing_parenthesis(&s.to_string()), Some(1));
	}
}