Files @ 7e640b0cffa7
Branch filter:

Location: Regular-Expresso/src/regexp/token.rs - annotation

7e640b0cffa7 4.8 KiB application/rls-services+xml Show Source Show as Raw Download as Raw
Laman
handling unparsable patterns
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
7e640b0cffa7
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
7e640b0cffa7
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
e93b264ec5cc
use std::fmt;

#[derive(Debug, Clone)]
pub enum ParsingError {
	Asterisk {s: String, pos: usize},
	Plus {s: String, pos: usize},
	OpeningParenthesis {s: String, pos: usize},
	ClosingParenthesis {s: String, pos: usize}
}

impl fmt::Display for ParsingError {
	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
		match self {
			ParsingError::Asterisk {s, pos} => {
				write!(f, "The asterisk operator is missing an argument. Pattern \"{s}\", position {pos}")
			},
			ParsingError::Plus {s, pos} => {
				write!(f, "The plus operator is missing an argument. Pattern \"{s}\", position {pos}")
			},
			ParsingError::OpeningParenthesis {s, pos} => {
				write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}")
			},
			ParsingError::ClosingParenthesis {s, pos} => {
				write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}")
			}
		}
	}
}

pub trait Token {
	fn is_skippable(&self) -> bool {false}
	fn list_first(&self) -> Vec<usize>;
	fn list_last(&self) -> Vec<usize>;
	fn list_neighbours(&self) -> Vec<(usize, usize)>;
}

pub struct Symbol {
	position: usize,
	value: char
}

pub struct Asterisk {
	content: Box<dyn Token>
}

pub struct Plus {
	content: Box<dyn Token>
}

pub struct Chain {
	content: Vec<Box<dyn Token>>
}

impl Token for Symbol {
	fn list_first(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_last(&self) -> Vec<usize> {
		return vec![self.position];
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		return vec![];
	}
}

impl Token for Asterisk {
	fn is_skippable(&self) -> bool {true}

	fn list_first(&self) -> Vec<usize> {
		return self.content.list_first();
	}

	fn list_last(&self) -> Vec<usize> {
		return self.content.list_last();
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = self.content.list_neighbours();

		for x in self.list_last() {
			for y in self.list_first() {
				res.push((x, y));
			}
		}

		return res;
	}
}

impl Token for Plus {
	fn list_first(&self) -> Vec<usize> {
		return self.content.list_first();
	}

	fn list_last(&self) -> Vec<usize> {
		return self.content.list_last();
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = self.content.list_neighbours();

		for x in self.list_last() {
			for y in self.list_first() {
				res.push((x, y));
			}
		}

		return res;
	}
}

impl Token for Chain {
	fn is_skippable(&self) -> bool {
		return self.content.iter().all(|x| x.is_skippable());
	}

	fn list_first(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter() {
			res.append(&mut token.list_first());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_last(&self) -> Vec<usize> {
		let mut res = Vec::new();
		for token in self.content.iter().rev() {
			res.append(&mut token.list_last());
			if !token.is_skippable() {break;}
		}

		return res;
	}

	fn list_neighbours(&self) -> Vec<(usize, usize)> {
		let mut res = Vec::new();
		let mut previous: Vec<&Box<dyn Token>> = Vec::new();
		for token in self.content.iter() {
			for t in previous.iter() {
				for x in t.list_last() {
					for y in token.list_first() {
						res.push((x, y));
					}
				}
			}
			res.append(&mut token.list_neighbours());

			if token.is_skippable() {
				previous.push(token);
			} else {
				previous = vec![token];
			}
		}

		return res;
	}
}

fn find_closing_parenthesis(s: &String) -> Option<usize> {
	let chars: Vec<char> = s.chars().collect();
	let mut counter = 0;

	for (i, c) in chars.iter().enumerate() {
		if *c == '(' {counter += 1;}
		else if *c == ')' {counter -= 1;}
		if counter == 0 {return Some(i);}
	}

	return None;
}

pub fn parse(pattern: &String, offset: usize) -> Result<Chain, ParsingError> {
	let chars: Vec<char> = pattern.chars().collect();
	let mut res: Vec<Box<dyn Token>> = Vec::new();
	let mut i = 0;
	while i < pattern.len() {
		let c = chars[i];
		match c {
			'(' => {
				let j = find_closing_parenthesis(&pattern[i..].to_string()).ok_or(ParsingError::ClosingParenthesis {s: pattern.clone(), pos: i})? + i;
				let inner_content = parse(&pattern[i+1..j].to_string(), offset+i+1)?;
				res.push(Box::new(inner_content));
				i = j+1;
			}
			'*' => {
				let token = res.pop().ok_or(ParsingError::Asterisk{s: pattern.clone(), pos: i})?;
				res.push(Box::new(Asterisk{content: token}));
				i += 1;
			}
			'+' => {
				let token = res.pop().ok_or(ParsingError::Plus{s: pattern.clone(), pos: i})?;
				res.push(Box::new(Plus{content: token}));
				i += 1;
			}
			')' => {
				return Err(ParsingError::OpeningParenthesis {s: pattern.clone(), pos: i});
			}
			c => {
				res.push(Box::new(Symbol{position: i+offset, value: c}));
				i += 1;
			}
		}
	}

	return Ok(Chain{content: res});
}

mod test {
	use super::*;

	#[test]
	fn test_closing_parenthesis() {
		let s = "()";
		assert_eq!(find_closing_parenthesis(&s.to_string()), Some(1));
	}
}