Files @ 68c16b6d84f3
Branch filter:

Location: Regular-Expresso/src/regexp/token.rs - annotation

68c16b6d84f3 7.7 KiB application/rls-services+xml Show Source Show as Raw Download as Raw
Laman
added the lambda transition
e9496b21cf64
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e9496b21cf64
e9496b21cf64
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e9496b21cf64
e9496b21cf64
e9496b21cf64
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
4eed8a486a3b
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e9496b21cf64
e9496b21cf64
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
3cdbf505e6f8
e93b264ec5cc
3cdbf505e6f8
e9496b21cf64
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
68c16b6d84f3
68c16b6d84f3
68c16b6d84f3
68c16b6d84f3
3cdbf505e6f8
3cdbf505e6f8
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e9496b21cf64
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
use std::{borrow::Borrow, fmt};

#[derive(Debug, Clone)]
pub enum ParsingError {
	Asterisk {s: String, pos: usize},
	Plus {s: String, pos: usize},
	OpeningParenthesis {s: String, pos: usize},
	ClosingParenthesis {s: String, pos: usize},
	EmptyAlternativeVariant
}

impl fmt::Display for ParsingError {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		match self {
			ParsingError::Asterisk {s, pos} => {
				write!(f, "The asterisk operator is missing an argument. Pattern \"{s}\", position {pos}")
			},
			ParsingError::Plus {s, pos} => {
				write!(f, "The plus operator is missing an argument. Pattern \"{s}\", position {pos}")
			},
			ParsingError::OpeningParenthesis {s, pos} => {
				write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::ClosingParenthesis {s, pos} => {
				write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::EmptyAlternativeVariant => {
				write!(f, "Found an empty Alternative variant.")
			}
		}
	}
}

pub struct Symbol {
	position: usize
}

pub struct Asterisk {
	content: Box<Token>
}

pub struct Plus {
	content: Box<Token>
}

pub struct Alternative {
	content: Vec<Box<Token>>
}

pub struct Chain {
	content: Vec<Box<Token>>
}

pub enum Token {
	Lambda,
	Symbol(Symbol),
	Asterisk(Asterisk),
	Plus(Plus),
	Alternative(Alternative),
	AlternativeSeparator,
	Chain(Chain)
}

impl Symbol {
	fn list_first(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_last(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		return vec![];
	}
}

impl Asterisk {
	fn list_first(&self) -> Vec<usize> {
		return self.content.list_first();
	}

	fn list_last(&self) -> Vec<usize> {
		return self.content.list_last();
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = self.content.list_neighbours();

		for x in self.list_last() {
			for y in self.list_first() {
				res.push((x, y));
			}
		}

		return res;
	}
}

impl Plus {
	fn list_first(&self) -> Vec<usize> {
		return self.content.list_first();
	}

	fn list_last(&self) -> Vec<usize> {
		return self.content.list_last();
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = self.content.list_neighbours();

		for x in self.list_last() {
			for y in self.list_first() {
				res.push((x, y));
			}
		}

		return res;
	}
}

impl Alternative {
	fn new(content: Vec<Box<Token>>) -> Result<Alternative, ParsingError> {
		let mut variants: Vec<Vec<Box<Token>>> = vec![Vec::new()];

		content.into_iter().for_each(|x| {
			if matches!(x.borrow(), Token::AlternativeSeparator) {
				variants.push(Vec::new());
			} else {
				variants.last_mut().unwrap().push(x);
			}
		});

		if variants.iter().any(|x| x.is_empty()) {
			return Err(ParsingError::EmptyAlternativeVariant);
		}

		return Ok(Alternative{
			content: variants.into_iter().map(
				|x| Box::new(Token::Chain(Chain{content: x}))
			).collect()
		});
	}

	fn is_skippable(&self) -> bool {
			return self.content.iter().any(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_last());
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_neighbours());
		}

		return res;
	}
}

impl Chain {
	fn is_skippable(&self) -> bool {
		return self.content.iter().all(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter().rev() {
			res.append(&mut token.list_last());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		let mut previous: Vec<&Box<Token>> = Vec::new();
		for token in self.content.iter() {
			for t in previous.iter() {
				for x in t.list_last() {
					for y in token.list_first() {
						res.push((x, y));
					}
				}
			}
			res.append(&mut token.list_neighbours());

			if token.is_skippable() {
				previous.push(token);
			} else {
				previous = vec![token];
			}
		}

		return res;
	}
}

impl Token {
	pub fn is_skippable(&self) -> bool {
		match self {
			Token::Lambda => true,
			Token::Symbol(_) => false,
			Token::Asterisk(_) => true,
			Token::Plus(_) => false,
			Token::Alternative(t) => t.is_skippable(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.is_skippable()
		}
	}

	pub fn list_first(&self) -> Vec<usize> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_first(),
			Token::Asterisk(t) => t.list_first(),
			Token::Plus(t) => t.list_first(),
			Token::Alternative(t) => t.list_first(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_first()
		}
	}

	pub fn list_last(&self) -> Vec<usize> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_last(),
			Token::Asterisk(t) => t.list_last(),
			Token::Plus(t) => t.list_last(),
			Token::Alternative(t) => t.list_last(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_last()
		}
	}

	pub fn list_neighbours(&self) -> Vec<(usize, usize)> {
		match self {
			Token::Lambda => vec![],
			Token::Symbol(t) => t.list_neighbours(),
			Token::Asterisk(t) => t.list_neighbours(),
			Token::Plus(t) => t.list_neighbours(),
			Token::Alternative(t) => t.list_neighbours(),
			Token::AlternativeSeparator => panic!(),
			Token::Chain(t) => t.list_neighbours()
		}
	}
}

fn find_closing_parenthesis(s: &String) -> Option<usize> {
	let chars: Vec<char> = s.chars().collect();
	let mut counter = 0;

	for (i, c) in chars.iter().enumerate() {
		if *c == '(' {counter += 1;}
		else if *c == ')' {counter -= 1;}
		if counter == 0 {return Some(i);}
	}

	return None;
}

pub fn parse(pattern: &String, offset: usize) -> Result<Token, ParsingError> {
	let chars: Vec<char> = pattern.chars().collect();
	let mut res: Vec<Box<Token>> = Vec::new();
	let mut is_alternative = false;
	let mut i = 0;
	while i < pattern.len() {
		let c = chars[i];
		match c {
			'(' => {
				let j = find_closing_parenthesis(&pattern[i..].to_string()).ok_or(ParsingError::ClosingParenthesis {s: pattern.clone(), pos: i})? + i;
				let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1)?;
				res.push(Box::new(inner_content));
				i = j+1;
			}
			'*' => {
				let token = res.pop().ok_or(ParsingError::Asterisk{s: pattern.clone(), pos: i})?;
				res.push(Box::new(Token::Asterisk(Asterisk{content: token})));
				i += 1;
			}
			'+' => {
				let token = res.pop().ok_or(ParsingError::Plus{s: pattern.clone(), pos: i})?;
				res.push(Box::new(Token::Plus(Plus{content: token})));
				i += 1;
			}
			')' => {
				return Err(ParsingError::OpeningParenthesis {s: pattern.clone(), pos: i});
			}
			'|' => {
				is_alternative = true;
				res.push(Box::new(Token::AlternativeSeparator));
				i += 1;
			}
			'_' => {
				res.push(Box::new(Token::Lambda));
				i += 1;
			}
			_c => {
				res.push(Box::new(Token::Symbol(Symbol{position: i+offset})));
				i += 1;
			}
		}
	}

	if is_alternative {
		return Ok(Token::Alternative(Alternative::new(res)?));
	} else {
		return Ok(Token::Chain(Chain{content: res}));
	}
}

mod test {
	use super::*;

	#[test]
	fn test_closing_parenthesis() {
		let s = "()";
		assert_eq!(find_closing_parenthesis(&s.to_string()), Some(1));
	}
}