Files @ bac2e7f972b5
Branch filter:

Location: Regular-Expresso/src/regexp/token.rs - annotation

bac2e7f972b5 6.8 KiB application/rls-services+xml Show Source Show as Raw Download as Raw
Laman
added a license
e9496b21cf64
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e9496b21cf64
e9496b21cf64
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e9496b21cf64
e9496b21cf64
e9496b21cf64
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
4eed8a486a3b
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
3cdbf505e6f8
e9496b21cf64
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
9e303d5ff83e
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
68c16b6d84f3
68c16b6d84f3
68c16b6d84f3
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
use std::{borrow::Borrow, fmt};

#[derive(Debug, Clone)]
pub enum ParsingError {
	Asterisk {s: String, pos: usize},
	OpeningParenthesis {s: String, pos: usize},
	ClosingParenthesis {s: String, pos: usize},
	EmptyAlternativeVariant
}

impl fmt::Display for ParsingError {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		match self {
			ParsingError::Asterisk {s, pos} => {
				write!(f, "The asterisk operator is missing an argument. Pattern \"{s}\", position {pos}")
			},
			ParsingError::OpeningParenthesis {s, pos} => {
				write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::ClosingParenthesis {s, pos} => {
				write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::EmptyAlternativeVariant => {
				write!(f, "Found an empty Alternative variant.")
			}
		}
	}
}

pub struct Symbol {
	position: usize
}

pub struct Asterisk {
	content: Box<Token>
}

pub struct Alternative {
	content: Vec<Box<Token>>
}

pub struct Chain {
	content: Vec<Box<Token>>
}

pub enum Token {
	Lambda,
	Symbol(Symbol),
	Asterisk(Asterisk),
	Alternative(Alternative),
	AlternativeSeparator,
	Chain(Chain)
}

impl Symbol {
	fn list_first(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_last(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		return vec![];
	}
}

impl Asterisk {
	fn list_first(&self) -> Vec<usize> {
		return self.content.list_first();
	}

	fn list_last(&self) -> Vec<usize> {
		return self.content.list_last();
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = self.content.list_neighbours();

		for x in self.list_last() {
			for y in self.list_first() {
				res.push((x, y));
			}
		}

		return res;
	}
}

impl Alternative {
	fn new(content: Vec<Box<Token>>) -> Result<Alternative, ParsingError> {
		let mut variants: Vec<Vec<Box<Token>>> = vec![Vec::new()];

		content.into_iter().for_each(|x| {
			if matches!(x.borrow(), Token::AlternativeSeparator) {
				variants.push(Vec::new());
			} else {
				variants.last_mut().unwrap().push(x);
			}
		});

		if variants.iter().any(|x| x.is_empty()) {
			return Err(ParsingError::EmptyAlternativeVariant);
		}

		return Ok(Alternative{
			content: variants.into_iter().map(
				|x| Box::new(Token::Chain(Chain{content: x}))
			).collect()
		});
	}

	fn is_skippable(&self) -> bool {
			return self.content.iter().any(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_last());
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_neighbours());
		}

		return res;
	}
}

impl Chain {
	fn is_skippable(&self) -> bool {
		return self.content.iter().all(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter().rev() {
			res.append(&mut token.list_last());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		let mut previous: Vec<&Box<Token>> = Vec::new();
		for token in self.content.iter() {
			for t in previous.iter() {
				for x in t.list_last() {
					for y in token.list_first() {
						res.push((x, y));
					}
				}
			}
			res.append(&mut token.list_neighbours());

			if token.is_skippable() {
				previous.push(token);
			} else {
				previous = vec![token];
			}
		}

		return res;
	}
}

impl Token {
	pub fn is_skippable(&self) -> bool {
		match self {
			Token::Lambda => true,
			Token::Symbol(_) => false,
			Token::Asterisk(_) => true,
			Token::Alternative(t) => t.is_skippable(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.is_skippable()
		}
	}

	pub fn list_first(&self) -> Vec<usize> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_first(),
			Token::Asterisk(t) => t.list_first(),
			Token::Alternative(t) => t.list_first(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_first()
		}
	}

	pub fn list_last(&self) -> Vec<usize> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_last(),
			Token::Asterisk(t) => t.list_last(),
			Token::Alternative(t) => t.list_last(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_last()
		}
	}

	pub fn list_neighbours(&self) -> Vec<(usize, usize)> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_neighbours(),
			Token::Asterisk(t) => t.list_neighbours(),
			Token::Alternative(t) => t.list_neighbours(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_neighbours()
		}
	}
}

fn find_closing_parenthesis(s: &String) -> Option<usize> {
	let chars: Vec<char> = s.chars().collect();
	let mut counter = 0;

	for (i, c) in chars.iter().enumerate() {
		if *c == '(' {counter += 1;}
		else if *c == ')' {counter -= 1;}
		if counter == 0 {return Some(i);}
	}

	return None;
}

pub fn parse(pattern: &String, offset: usize) -> Result<Token, ParsingError> {
	let chars: Vec<char> = pattern.chars().collect();
	let mut res: Vec<Box<Token>> = Vec::new();
	let mut is_alternative = false;
	let mut i = 0;
	while i < pattern.len() {
		let c = chars[i];
		match c {
			'(' => {
				let j = find_closing_parenthesis(&pattern[i..].to_string()).ok_or(ParsingError::ClosingParenthesis {s: pattern.clone(), pos: i})? + i;
				let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1)?;
				res.push(Box::new(inner_content));
				i = j+1;
			}
			'*' => {
				let token = res.pop().ok_or(ParsingError::Asterisk{s: pattern.clone(), pos: i})?;
				res.push(Box::new(Token::Asterisk(Asterisk{content: token})));
				i += 1;
			}
			')' => {
				return Err(ParsingError::OpeningParenthesis {s: pattern.clone(), pos: i});
			}
			'|' | '+' => {
				is_alternative = true;
				res.push(Box::new(Token::AlternativeSeparator));
				i += 1;
			}
			'_' => {
				res.push(Box::new(Token::Lambda));
				i += 1;
			}
			_c => {
				res.push(Box::new(Token::Symbol(Symbol{position: i+offset})));
				i += 1;
			}
		}
	}

	if is_alternative {
		return Ok(Token::Alternative(Alternative::new(res)?));
	} else {
		return Ok(Token::Chain(Chain{content: res}));
	}
}

mod test {
	use super::*;

	#[test]
	fn test_closing_parenthesis() {
		let s = "()";
		assert_eq!(find_closing_parenthesis(&s.to_string()), Some(1));
	}
}