Changeset - 08f1519c859e
[Not reviewed]
default
0 6 0
Laman - 9 months ago 2024-07-09 13:05:02

added Rust documentation
6 files changed with 94 insertions and 46 deletions:
0 comments (0 inline, 0 general)
regexp.py
Show inline comments
 
@@ -11,7 +11,7 @@ class ParsingError(Exception):
 

	
 

	
 
class Token:
 
	"""Abstract base class representing a single item of a regular expressions."""
 
	"""Abstract base class representing a single item of a regular expression."""
 
	# is the Token mandatory, or can it be omitted
 
	is_skippable = False
 

	
src/lib.rs
Show inline comments
 
mod regexp;
 
pub use regexp::{Regexp, ParsingError};
 
pub use regexp::{RegexpNFA, ParsingError};
src/main.rs
Show inline comments
 
use std::env;
 
use regexp::Regexp;
 
use regexp::RegexpNFA;
 

	
 
fn test() {
 
	let tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"];
 
	for pattern in ["a(b|c)", "a*b*", "(ab)*", "a((bc)*d)*", "(a|b)*a(a|b)(a|b)(a|b)", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"] {
 
		println!("# {pattern}");
 
		let r = match Regexp::new(&pattern.to_string()) {
 
		let r = match RegexpNFA::new(&pattern.to_string()) {
 
			Ok(r1) => r1.determinize().reduce().normalize(),
 
			Err(e) => {
 
				println!("{e}");
 
@@ -24,7 +24,7 @@ fn main() {
 
	match args[1].as_str() {
 
		"test" => test(),
 
		"match" => {
 
			let r = match Regexp::new(&args[2].to_string()) {
 
			let r = match RegexpNFA::new(&args[2].to_string()) {
 
				Ok(r1) => r1.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
@@ -33,13 +33,13 @@ fn main() {
 
			println!("{}", r.eval(args[3].to_string()));
 
		},
 
		"compare" => {
 
			let r1 = match Regexp::new(&args[2].to_string()) {
 
			let r1 = match RegexpNFA::new(&args[2].to_string()) {
 
				Ok(r) => r.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
				}
 
			};
 
			let r2 = match Regexp::new(&args[3].to_string()) {
 
			let r2 = match RegexpNFA::new(&args[3].to_string()) {
 
				Ok(r) => r.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
@@ -48,7 +48,7 @@ fn main() {
 
			println!("{}", r1.find_distinguishing_string(&r2).unwrap_or("None".to_string()));
 
		},
 
		s => {
 
			println!("An unknown command: \"{s}\". Use \"match\" or \"test\".")
 
			println!("An unknown command: \"{s}\". Use \"match\", \"compare\" or \"test\".")
 
		}
 
	}
 
}
src/regexp.rs
Show inline comments
 
@@ -7,15 +7,16 @@ use token::parse;
 
const START_NFA: usize = usize::MAX;
 
const START_DFA: usize = 0;
 

	
 
/// A regular expression implemented as a non-deterministic finite automaton.
 
#[derive(Debug)]
 
pub struct Regexp {
 
pub struct RegexpNFA {
 
	rules: HashMap<(usize, char), HashSet<usize>>,
 
	end_states: HashSet<usize>,
 
	alphabet: Vec<char>
 
}
 

	
 
impl Regexp {
 
	pub fn new(pattern: &String) -> Result<Regexp, ParsingError> {
 
impl RegexpNFA {
 
	pub fn new(pattern: &String) -> Result<RegexpNFA, ParsingError> {
 
		let r = parse(pattern, 0)?;
 
		let pattern_chars = Vec::from_iter(pattern.chars());
 
		let mut rules: HashMap<(usize, char), HashSet<usize>> = HashMap::new();
 
@@ -49,9 +50,10 @@ impl Regexp {
 
		let mut alphabet_vec = Vec::from_iter(alphabet.into_iter());
 
		alphabet_vec.sort();
 

	
 
		return Ok(Regexp{rules, end_states, alphabet: alphabet_vec});
 
		return Ok(RegexpNFA{rules, end_states, alphabet: alphabet_vec});
 
	}
 

	
 
	/// Decide if a string matches the regexp.
 
	pub fn eval(&self, s: String) -> bool {
 
		let mut multistate = HashSet::from([START_NFA]);
 

	
 
@@ -71,6 +73,7 @@ impl Regexp {
 
		return multistate.iter().any(|x| self.end_states.contains(x));
 
	}
 

	
 
	/// Convert the non-deterministic finite automaton into a deterministic one.
 
	pub fn determinize(&self) -> RegexpDFA {
 
		const FAIL: usize = usize::MAX;
 
		let alphabet_index: HashMap<char, usize> = self.alphabet.iter().enumerate().map(|(i, c)| (*c, i)).collect();
 
@@ -79,6 +82,7 @@ impl Regexp {
 
		let mut end_states: HashSet<usize> = HashSet::new();
 
		if self.end_states.contains(&START_NFA) {end_states.insert(START_DFA);}
 

	
 
		// mapping the NFA state subsets -> DFA states
 
		let mut index = HashMap::from([(vec![START_NFA], START_DFA)]);
 
		let mut stack = vec![vec![START_NFA]];
 

	
 
@@ -86,6 +90,7 @@ impl Regexp {
 
			let multistate = stack.pop().unwrap();
 
			let mut new_rules: HashMap<char, HashSet<usize>> = HashMap::new();
 

	
 
			// collect all possible destination states from the multistate
 
			for key in self.rules.keys().filter(|(st, _c)| multistate.binary_search(st).is_ok()) {
 
				let (_st, c) = key;
 
				if !new_rules.contains_key(c) {
 
@@ -96,6 +101,7 @@ impl Regexp {
 
				}
 
			}
 

	
 
			// build a row for the DFA transition function table
 
			for (c, target_set) in new_rules.into_iter() {
 
				let mut target_vec = Vec::from_iter(target_set.into_iter());
 
				target_vec.sort();
 
@@ -113,6 +119,7 @@ impl Regexp {
 
			}
 
		}
 

	
 
		// add a fail state, so the transition function is complete
 
		let fail = index.len();
 
		compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect();
 
		compact_rules.extend(iter::repeat(fail).take(n));
 
@@ -121,6 +128,8 @@ impl Regexp {
 
	}
 
}
 

	
 
/// A regular expression implemented as a deterministic finite automaton.
 
/// This simplifies support for more features compared to the NFA Regexp.
 
#[derive(Clone)]
 
pub struct RegexpDFA {
 
	rules: Vec<usize>,
 
@@ -129,10 +138,12 @@ pub struct RegexpDFA {
 
}
 

	
 
impl RegexpDFA {
 
	/// Construct a DFA with the provided parameters, or a minimal DFA if the parameters are empty.
 
	pub fn new(rules: Vec<usize>, end_states: HashSet<usize>, alphabet_index: HashMap<char, usize>) -> RegexpDFA {
 
		if rules.len() > 0 {
 
			return RegexpDFA{rules, end_states, alphabet_index};
 
		} else {
 
			// this saves us checking for an empty `alphabet_index` in other methods.
 
			return RegexpDFA{
 
				rules: vec![1, 1],
 
				end_states,
 
@@ -141,6 +152,7 @@ impl RegexpDFA {
 
		}
 
	}
 

	
 
	/// Decide if a string matches the regexp.
 
	pub fn eval(&self, s: String) -> bool {
 
		let n = self.alphabet_index.len();
 
		let mut state = START_DFA;
 
@@ -156,11 +168,13 @@ impl RegexpDFA {
 
		return self.end_states.contains(&state);
 
	}
 

	
 
	/// Minimize the automaton by collapsing equivalent states.
 
	pub fn reduce(&self) -> RegexpDFA {
 
		let partition = self.find_equivalent_states();
 
		return self.collapse_states(partition);
 
	}
 

	
 
	/// Normalize the automaton by relabeling the states in a breadth first search walkthrough order and remove unreachable states.
 
	pub fn normalize(&self) -> RegexpDFA {
 
		let n = self.alphabet_index.len();
 
		let m = self.rules.len()/n;
 
@@ -190,6 +204,8 @@ impl RegexpDFA {
 
		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};
 
	}
 

	
 
	/// Find the shortest string that is accepted by self or `r`, but not both.
 
	/// It is expected that the automatons are already reduced and normalized.
 
	pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option<String> {
 
		if self.rules == other.rules && self.end_states == other.end_states {
 
			return None;
 
@@ -199,8 +215,10 @@ impl RegexpDFA {
 
		let r2 = other.expand_alphabet(&self.alphabet_index);
 
		let product = r1.build_product_automaton(&r2);
 
		let n = product.alphabet_index.len();
 
		let reverse_alphabet_index: HashMap<usize, char> = HashMap::from_iter(product.alphabet_index.iter().map(|(&k, &v)| (v, k)));
 
		// mapping letter keys -> letters
 
		let inverse_alphabet_index: HashMap<usize, char> = HashMap::from_iter(product.alphabet_index.iter().map(|(&k, &v)| (v, k)));
 

	
 
		// look for an accepting state with a BFS
 
		let mut queue = VecDeque::from([(0, "".to_string())]);
 
		let mut visited = HashSet::new();
 
		while !queue.is_empty() {
 
@@ -210,15 +228,16 @@ impl RegexpDFA {
 
			}
 
			for (i, target) in product.rules[state*n..(state+1)*n].iter().enumerate() {
 
				if !visited.contains(target) {
 
					queue.push_back((*target, acc.clone()+&String::from(reverse_alphabet_index[&i])));
 
					queue.push_back((*target, acc.clone()+&String::from(inverse_alphabet_index[&i])));
 
					visited.insert(target);
 
				}
 
			}
 
		}
 

	
 
		panic!();
 
		panic!("Failed to detect the Regexps as equivalent and failed to find a distinguishing string.");
 
	}
 

	
 
	/// Partition states into their equivalence classes with Hopcroft's algorithm.
 
	fn find_equivalent_states(&self) -> Vec<HashSet<usize>> {
 
		let n = self.alphabet_index.len();
 
		let m = self.rules.len() / n;
 
@@ -231,6 +250,7 @@ impl RegexpDFA {
 
			}
 
		}
 

	
 
		// store state subsets here so we can just pass around their indices
 
		let mut set_bag = vec![
 
			self.end_states.clone(),
 
			HashSet::from_iter(0..m).difference(&self.end_states).copied().collect()
 
@@ -274,11 +294,13 @@ impl RegexpDFA {
 
		return Vec::from_iter(res.into_iter().map(|k| std::mem::take(&mut set_bag[k])));
 
	}
 

	
 
	/// Collapse equivalent states from each partition class into a single state.
 
	fn collapse_states(&self, partition: Vec<HashSet<usize>>) -> RegexpDFA {
 
		let n = self.alphabet_index.len();
 
		let m = self.rules.len()/n;
 
		let mut rules = vec![];
 

	
 
		// states mapping due to the equivalents collapse
 
		let mut eq_mapping: Vec<usize> = ((0..m)).collect();
 
		for eq_set in partition.into_iter() {
 
			let mut eq_vec = Vec::from_iter(eq_set.into_iter());
 
@@ -289,6 +311,7 @@ impl RegexpDFA {
 
			}
 
		}
 

	
 
		// states mapping to keep the rules list compact after the equivalents collapse
 
		let mut discard_mapping: Vec<usize> = ((0..m)).collect();
 
		let mut discard_count = 0;
 

	
 
@@ -307,6 +330,7 @@ impl RegexpDFA {
 
		return RegexpDFA{rules, end_states, alphabet_index: self.alphabet_index.clone()};
 
	}
 

	
 
	/// Expand the automaton to accommodate union of `self.alphabet_index` and the provided `alphabet_index`.
 
	fn expand_alphabet(&self, alphabet_index: &HashMap<char, usize>) -> RegexpDFA {
 
		if *alphabet_index == self.alphabet_index {
 
			return self.clone();
 
@@ -318,10 +342,13 @@ impl RegexpDFA {
 
		let combined_alphabet: HashSet<char> = HashSet::from_iter(self.alphabet_index.keys().chain(alphabet_index.keys()).copied());
 
		let mut combined_vec = Vec::from_iter(combined_alphabet.into_iter());
 
		combined_vec.sort();
 
		// a new alphabet_index
 
		let combined_index = HashMap::from_iter(combined_vec.iter().enumerate().map(|(i, c)| (*c, i)));
 
		// a new letter key -> the old letter key
 
		let conversion_index: HashMap<usize, usize> = HashMap::from_iter(self.alphabet_index.iter().map(|(k, v)| (combined_index[k], *v)));
 
		let n2 = combined_vec.len();
 

	
 
		// rewrite the rules into a new table, filling blanks with a new fail state
 
		let rules: Vec<usize> = (0..m*n2).map(
 
			|i| {
 
				let (j, k) = (i/n2, i%n2);
 
@@ -334,6 +361,7 @@ impl RegexpDFA {
 
		return RegexpDFA{rules, end_states: self.end_states.clone(), alphabet_index: combined_index}.reduce().normalize();
 
	}
 

	
 
	/// Create a new automaton, with a carthesian product of the arguments' states and corresponding transition rules.
 
	fn build_product_automaton(&self, other: &RegexpDFA) -> RegexpDFA {
 
		let n = self.alphabet_index.len();
 
		let m = other.rules.len() / n;
 
@@ -342,6 +370,7 @@ impl RegexpDFA {
 
		let mut rules = vec![];
 
		let mut end_states = HashSet::new();
 

	
 
		// expand each self state into m new states, one for each of the `other` states
 
		for s1 in 0..k {
 
			let row1 = &self.rules[s1*n..(s1+1)*n];
 
			for s2 in 0..m {
src/regexp/token.rs
Show inline comments
 
@@ -18,7 +18,7 @@ impl fmt::Display for ParsingError {
 
				write!(f, "An opening parenthesis not found. Pattern \"{s}\", position {pos}")
 
			},
 
			ParsingError::ClosingParenthesis {s, pos} => {
 
				write!(f, "An closing parenthesis not found. Pattern \"{s}\", position {pos}")
 
				write!(f, "A closing parenthesis not found. Pattern \"{s}\", position {pos}")
 
			},
 
			ParsingError::EmptyAlternativeVariant => {
 
				write!(f, "Found an empty Alternative variant.")
 
@@ -27,27 +27,34 @@ impl fmt::Display for ParsingError {
 
	}
 
}
 

	
 
/// A single letter or other alphabet symbol.
 
pub struct Symbol {
 
	/// Symbol position in the regular expression.
 
	position: usize
 
}
 

	
 
/// A unary operator specifying its content to occur zero or more times.
 
pub struct Asterisk {
 
	content: Box<Token>
 
}
 

	
 
/// An operator with a variable number of arguments, specifying exchangeable alternatives.
 
pub struct Alternative {
 
	content: Vec<Box<Token>>
 
}
 

	
 
/// An operator expressing a concatenation of its content Tokens.
 
pub struct Chain {
 
	content: Vec<Box<Token>>
 
}
 

	
 
/// Enum encapsulating possible items of a regular expression.
 
pub enum Token {
 
	Lambda,
 
	Lambda, // An empty string, useful as an `Alternative`.
 
	Symbol(Symbol),
 
	Asterisk(Asterisk),
 
	Alternative(Alternative),
 
	// A special token to temporarily separate Alternative variants. Removed in the Alternative constructor.
 
	AlternativeSeparator,
 
	Chain(Chain)
 
}
 
@@ -89,6 +96,8 @@ impl Asterisk {
 
}
 

	
 
impl Alternative {
 
	/// Split a sequence of `Tokens` by `AlternativeSeparator` into alternative variants and return the result.
 
	/// If any variant is empty, return an `Err``.
 
	fn new(content: Vec<Box<Token>>) -> Result<Alternative, ParsingError> {
 
		let mut variants: Vec<Vec<Box<Token>>> = vec![Vec::new()];
 

	
 
@@ -112,7 +121,7 @@ impl Alternative {
 
	}
 

	
 
	fn is_skippable(&self) -> bool {
 
			return self.content.iter().any(|x| x.is_skippable());
 
		return self.content.iter().any(|x| x.is_skippable());
 
	}
 

	
 
	fn list_first(&self) -> Vec<usize> {
 
@@ -193,51 +202,57 @@ impl Chain {
 
}
 

	
 
impl Token {
 
	/// Decide whether the `Token` has to contain some `Symbols`,
 
	/// or whether it can be stepped over when looking for first, last and neighbouring items.
 
	pub fn is_skippable(&self) -> bool {
 
		match self {
 
			Token::Lambda => true,
 
			Token::Symbol(_) => false,
 
			Token::Asterisk(_) => true,
 
			Token::Alternative(t) => t.is_skippable(),
 
			Token::AlternativeSeparator => panic!(),
 
			Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"),
 
			Token::Chain(t) => t.is_skippable()
 
		}
 
	}
 

	
 
	/// List all possible string positions the token can start with.
 
	pub fn list_first(&self) -> Vec<usize> {
 
		match self {
 
			Token::Lambda => vec![],
 
			Token::Symbol(t) => t.list_first(),
 
			Token::Asterisk(t) => t.list_first(),
 
			Token::Alternative(t) => t.list_first(),
 
			Token::AlternativeSeparator => panic!(),
 
			Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"),
 
			Token::Chain(t) => t.list_first()
 
		}
 
	}
 

	
 
	/// List all possible string positions the token can end with.
 
	pub fn list_last(&self) -> Vec<usize> {
 
		match self {
 
			Token::Lambda => vec![],
 
			Token::Symbol(t) => t.list_last(),
 
			Token::Asterisk(t) => t.list_last(),
 
			Token::Alternative(t) => t.list_last(),
 
			Token::AlternativeSeparator => panic!(),
 
			Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"),
 
			Token::Chain(t) => t.list_last()
 
		}
 
	}
 

	
 
	/// List positions of all possibly neighbouring subtokens.
 
	pub fn list_neighbours(&self) -> Vec<(usize, usize)> {
 
		match self {
 
			Token::Lambda => vec![],
 
			Token::Symbol(t) => t.list_neighbours(),
 
			Token::Asterisk(t) => t.list_neighbours(),
 
			Token::Alternative(t) => t.list_neighbours(),
 
			Token::AlternativeSeparator => panic!(),
 
			Token::AlternativeSeparator => panic!("Separators must be already removed at this stage"),
 
			Token::Chain(t) => t.list_neighbours()
 
		}
 
	}
 
}
 

	
 
/// For a string starting with a parenthesis, find its matching closing parenthesis, or return None.
 
fn find_closing_parenthesis(s: &String) -> Option<usize> {
 
	let chars: Vec<char> = s.chars().collect();
 
	let mut counter = 0;
 
@@ -251,6 +266,10 @@ fn find_closing_parenthesis(s: &String) 
 
	return None;
 
}
 

	
 
/// Recursively parse the pattern into a `Token` tree.
 
/// 
 
/// The `offset` defines where the `pattern` starts relative to the original pattern,
 
/// to record correct global token positions in the subcalls
 
pub fn parse(pattern: &String, offset: usize) -> Result<Token, ParsingError> {
 
	let chars: Vec<char> = pattern.chars().collect();
 
	let mut res: Vec<Box<Token>> = Vec::new();
tests/test_regexp.rs
Show inline comments
 
use regexp::Regexp;
 
use regexp::RegexpNFA;
 
use regexp::ParsingError;
 

	
 
#[test]
 
fn test_eval_basic_nfa() {
 
	let r = Regexp::new(&String::from("abc")).unwrap();
 
	let r = RegexpNFA::new(&String::from("abc")).unwrap();
 
	assert!(r.eval(String::from("abc")));
 
	assert!(!r.eval(String::from("ab")));
 
	assert!(!r.eval(String::from("abcd")));
 
@@ -11,7 +11,7 @@ fn test_eval_basic_nfa() {
 

	
 
#[test]
 
fn test_eval_basic_dfa() {
 
	let r = Regexp::new(&String::from("abc")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpNFA::new(&String::from("abc")).unwrap().determinize().reduce().normalize();
 
	assert!(r.eval(String::from("abc")));
 
	assert!(!r.eval(String::from("ab")));
 
	assert!(!r.eval(String::from("abcd")));
 
@@ -19,22 +19,22 @@ fn test_eval_basic_dfa() {
 

	
 
#[test]
 
fn test_eval_empty_nfa() {
 
	assert!(Regexp::new(&String::from("a*")).unwrap().eval(String::from("")));
 
	assert!(Regexp::new(&String::from("")).unwrap().eval(String::from("")));
 
	assert!(!Regexp::new(&String::from("")).unwrap().eval(String::from("a")));
 
	assert!(!Regexp::new(&String::from("a")).unwrap().eval(String::from("")));
 
	assert!(RegexpNFA::new(&String::from("a*")).unwrap().eval(String::from("")));
 
	assert!(RegexpNFA::new(&String::from("")).unwrap().eval(String::from("")));
 
	assert!(!RegexpNFA::new(&String::from("")).unwrap().eval(String::from("a")));
 
	assert!(!RegexpNFA::new(&String::from("a")).unwrap().eval(String::from("")));
 
}
 

	
 
#[test]
 
fn test_eval_empty_dfa() {
 
	assert!(Regexp::new(&String::from("a*")).unwrap().determinize().reduce().normalize().eval(String::from("")));
 
	assert!(Regexp::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("")));
 
	assert!(!Regexp::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("a")));
 
	assert!(RegexpNFA::new(&String::from("a*")).unwrap().determinize().reduce().normalize().eval(String::from("")));
 
	assert!(RegexpNFA::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("")));
 
	assert!(!RegexpNFA::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("a")));
 
}
 

	
 
#[test]
 
fn test_eval_asterisk_nfa() {
 
	let r = Regexp::new(&String::from("a*b*")).unwrap();
 
	let r = RegexpNFA::new(&String::from("a*b*")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("ab")));
 
	assert!(r.eval(String::from("aabb")));
 
@@ -43,7 +43,7 @@ fn test_eval_asterisk_nfa() {
 

	
 
#[test]
 
fn test_eval_asterisk_dfa() {
 
	let r = Regexp::new(&String::from("a*b*")).unwrap().determinize().normalize().reduce();
 
	let r = RegexpNFA::new(&String::from("a*b*")).unwrap().determinize().normalize().reduce();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("ab")));
 
	assert!(r.eval(String::from("aabb")));
 
@@ -52,7 +52,7 @@ fn test_eval_asterisk_dfa() {
 

	
 
#[test]
 
fn test_eval_alternative_nfa() {
 
	let r = Regexp::new(&String::from("a|b|c")).unwrap();
 
	let r = RegexpNFA::new(&String::from("a|b|c")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("b")));
 
	assert!(r.eval(String::from("c")));
 
@@ -62,7 +62,7 @@ fn test_eval_alternative_nfa() {
 

	
 
#[test]
 
fn test_eval_alternative_dfa() {
 
	let r = Regexp::new(&String::from("a|b|c")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpNFA::new(&String::from("a|b|c")).unwrap().determinize().reduce().normalize();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("b")));
 
	assert!(r.eval(String::from("c")));
 
@@ -72,12 +72,12 @@ fn test_eval_alternative_dfa() {
 

	
 
#[test]
 
fn test_eval_lambda_nfa() {
 
	let r = Regexp::new(&String::from("a_")).unwrap();
 
	let r = RegexpNFA::new(&String::from("a_")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(!r.eval(String::from("")));
 
	assert!(!r.eval(String::from("ab")));
 

	
 
	let r = Regexp::new(&String::from("a|_")).unwrap();
 
	let r = RegexpNFA::new(&String::from("a|_")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("")));
 
	assert!(!r.eval(String::from("b")));
 
@@ -85,12 +85,12 @@ fn test_eval_lambda_nfa() {
 

	
 
#[test]
 
fn test_eval_lambda_dfa() {
 
	let r = Regexp::new(&String::from("a_")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpNFA::new(&String::from("a_")).unwrap().determinize().reduce().normalize();
 
	assert!(r.eval(String::from("a")));
 
	assert!(!r.eval(String::from("")));
 
	assert!(!r.eval(String::from("ab")));
 

	
 
	let r = Regexp::new(&String::from("a|_")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpNFA::new(&String::from("a|_")).unwrap().determinize().reduce().normalize();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("")));
 
	assert!(!r.eval(String::from("b")));
 
@@ -98,36 +98,36 @@ fn test_eval_lambda_dfa() {
 

	
 
#[test]
 
fn test_invalid_asterisk() {
 
	let x = Regexp::new(&String::from("*"));
 
	let x = RegexpNFA::new(&String::from("*"));
 
	assert!(matches!(x, Err(ParsingError::Asterisk{s: _, pos: 0})));
 
}
 

	
 
#[test]
 
fn test_invalid_closing_parenthesis() {
 
	let x = Regexp::new(&String::from("(a"));
 
	let x = RegexpNFA::new(&String::from("(a"));
 
	assert!(matches!(x, Err(ParsingError::ClosingParenthesis{s: _, pos: 0})));
 
}
 

	
 
#[test]
 
fn test_invalid_opening_parenthesis() {
 
	let x = Regexp::new(&String::from("a)"));
 
	let x = RegexpNFA::new(&String::from("a)"));
 
	assert!(matches!(x, Err(ParsingError::OpeningParenthesis{s: _, pos: 1})));
 
}
 

	
 
#[test]
 
fn test_invalid_empty_variant_start() {
 
	let x = Regexp::new(&String::from("a(|b)"));
 
	let x = RegexpNFA::new(&String::from("a(|b)"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
}
 

	
 
#[test]
 
fn test_invalid_empty_variant_end() {
 
	let x = Regexp::new(&String::from("a|"));
 
	let x = RegexpNFA::new(&String::from("a|"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
}
 

	
 
#[test]
 
fn test_invalid_empty_variant_middle() {
 
	let x = Regexp::new(&String::from("a||b"));
 
	let x = RegexpNFA::new(&String::from("a||b"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
}
0 comments (0 inline, 0 general)