Changeset - ac6c57072747
[Not reviewed]
default
0 4 0
Laman - 9 months ago 2024-07-09 13:39:46

refactoring: reduction and normalization included in the DFA constructor
4 files changed with 49 insertions and 46 deletions:
0 comments (0 inline, 0 general)
src/lib.rs
Show inline comments
 
mod regexp;
 
pub use regexp::{RegexpNFA, ParsingError};
 
pub use regexp::{RegexpNFA, RegexpDFA, ParsingError};
src/main.rs
Show inline comments
 
use std::env;
 
use regexp::RegexpNFA;
 
use regexp::RegexpDFA;
 

	
 
fn test() {
 
	let tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"];
 
	for pattern in ["a(b|c)", "a*b*", "(ab)*", "a((bc)*d)*", "(a|b)*a(a|b)(a|b)(a|b)", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"] {
 
		println!("# {pattern}");
 
		let r = match RegexpNFA::new(&pattern.to_string()) {
 
			Ok(r1) => r1.determinize().reduce().normalize(),
 
		match RegexpDFA::new(&pattern.to_string()) {
 
			Ok(r) => {
 
				for &t in tests.iter() {
 
					println!("{t} {}", r.eval(t.to_string()));
 
				}
 
				println!();
 
			},
 
			Err(e) => {
 
				println!("{e}");
 
				continue;
 
			}
 
		};
 
		for &t in tests.iter() {
 
			println!("{t} {}", r.eval(t.to_string()));
 
		}
 
		println!();
 
	}
 
}
 

	
 
@@ -24,27 +25,16 @@ fn main() {
 
	match args[1].as_str() {
 
		"test" => test(),
 
		"match" => {
 
			let r = match RegexpNFA::new(&args[2].to_string()) {
 
				Ok(r1) => r1.determinize().reduce().normalize(),
 
			match RegexpDFA::new(&args[2].to_string()) {
 
				Ok(r) => println!("{}", r.eval(args[3].to_string())),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
				}
 
			};
 
			println!("{}", r.eval(args[3].to_string()));
 
			}
 
		},
 
		"compare" => {
 
			let r1 = match RegexpNFA::new(&args[2].to_string()) {
 
				Ok(r) => r.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
				}
 
			};
 
			let r2 = match RegexpNFA::new(&args[3].to_string()) {
 
				Ok(r) => r.determinize().reduce().normalize(),
 
				Err(e) => {
 
					panic!("ERROR: {e}");
 
				}
 
			};
 
			let r1 = RegexpDFA::new(&args[2].to_string()).unwrap_or_else(|e| panic!("ERROR: {e}"));
 
			let r2 = RegexpDFA::new(&args[3].to_string()).unwrap_or_else(|e| panic!("ERROR: {e}"));
 
			println!("{}", r1.find_distinguishing_string(&r2).unwrap_or("None".to_string()));
 
		},
 
		s => {
src/regexp.rs
Show inline comments
 
@@ -124,7 +124,16 @@ impl RegexpNFA {
 
		compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect();
 
		compact_rules.extend(iter::repeat(fail).take(n));
 
		
 
		return RegexpDFA::new(compact_rules, end_states, alphabet_index);
 
		if compact_rules.len() > 0 {
 
			return RegexpDFA{rules: compact_rules, end_states, alphabet_index};
 
		} else {
 
			// return a minimal non-empty DFA
 
			return RegexpDFA{
 
				rules: vec![1, 1],
 
				end_states,
 
				alphabet_index: HashMap::from([('\0', 0)])
 
			};
 
		}
 
	}
 
}
 

	
 
@@ -138,18 +147,9 @@ pub struct RegexpDFA {
 
}
 

	
 
impl RegexpDFA {
 
	/// Construct a DFA with the provided parameters, or a minimal DFA if the parameters are empty.
 
	pub fn new(rules: Vec<usize>, end_states: HashSet<usize>, alphabet_index: HashMap<char, usize>) -> RegexpDFA {
 
		if rules.len() > 0 {
 
			return RegexpDFA{rules, end_states, alphabet_index};
 
		} else {
 
			// this saves us checking for an empty `alphabet_index` in other methods.
 
			return RegexpDFA{
 
				rules: vec![1, 1],
 
				end_states,
 
				alphabet_index: HashMap::from([('\0', 0)])
 
			};
 
		}
 
	pub fn new(pattern: &String) -> Result<Self, ParsingError> {
 
		let nfa = RegexpNFA::new(pattern)?;
 
		return Ok(nfa.determinize().reduce().normalize());
 
	}
 

	
 
	/// Decide if a string matches the regexp.
 
@@ -205,7 +205,7 @@ impl RegexpDFA {
 
	}
 

	
 
	/// Find the shortest string that is accepted by self or `r`, but not both.
 
	/// It is expected that the automatons are already reduced and normalized.
 
	/// The input automatons have to be already reduced and normalized (they are in the `new` constructor).
 
	pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option<String> {
 
		if self.rules == other.rules && self.end_states == other.end_states {
 
			return None;
tests/test_regexp.rs
Show inline comments
 
use regexp::RegexpNFA;
 
use regexp::RegexpDFA;
 
use regexp::ParsingError;
 

	
 
#[test]
 
@@ -11,7 +12,7 @@ fn test_eval_basic_nfa() {
 

	
 
#[test]
 
fn test_eval_basic_dfa() {
 
	let r = RegexpNFA::new(&String::from("abc")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpDFA::new(&String::from("abc")).unwrap();
 
	assert!(r.eval(String::from("abc")));
 
	assert!(!r.eval(String::from("ab")));
 
	assert!(!r.eval(String::from("abcd")));
 
@@ -27,9 +28,9 @@ fn test_eval_empty_nfa() {
 

	
 
#[test]
 
fn test_eval_empty_dfa() {
 
	assert!(RegexpNFA::new(&String::from("a*")).unwrap().determinize().reduce().normalize().eval(String::from("")));
 
	assert!(RegexpNFA::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("")));
 
	assert!(!RegexpNFA::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("a")));
 
	assert!(RegexpDFA::new(&String::from("a*")).unwrap().eval(String::from("")));
 
	assert!(RegexpDFA::new(&String::from("")).unwrap().eval(String::from("")));
 
	assert!(!RegexpDFA::new(&String::from("")).unwrap().eval(String::from("a")));
 
}
 

	
 
#[test]
 
@@ -43,7 +44,7 @@ fn test_eval_asterisk_nfa() {
 

	
 
#[test]
 
fn test_eval_asterisk_dfa() {
 
	let r = RegexpNFA::new(&String::from("a*b*")).unwrap().determinize().normalize().reduce();
 
	let r = RegexpDFA::new(&String::from("a*b*")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("ab")));
 
	assert!(r.eval(String::from("aabb")));
 
@@ -62,7 +63,7 @@ fn test_eval_alternative_nfa() {
 

	
 
#[test]
 
fn test_eval_alternative_dfa() {
 
	let r = RegexpNFA::new(&String::from("a|b|c")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpDFA::new(&String::from("a|b|c")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("b")));
 
	assert!(r.eval(String::from("c")));
 
@@ -85,12 +86,12 @@ fn test_eval_lambda_nfa() {
 

	
 
#[test]
 
fn test_eval_lambda_dfa() {
 
	let r = RegexpNFA::new(&String::from("a_")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpDFA::new(&String::from("a_")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(!r.eval(String::from("")));
 
	assert!(!r.eval(String::from("ab")));
 

	
 
	let r = RegexpNFA::new(&String::from("a|_")).unwrap().determinize().reduce().normalize();
 
	let r = RegexpDFA::new(&String::from("a|_")).unwrap();
 
	assert!(r.eval(String::from("a")));
 
	assert!(r.eval(String::from("")));
 
	assert!(!r.eval(String::from("b")));
 
@@ -100,34 +101,46 @@ fn test_eval_lambda_dfa() {
 
fn test_invalid_asterisk() {
 
	let x = RegexpNFA::new(&String::from("*"));
 
	assert!(matches!(x, Err(ParsingError::Asterisk{s: _, pos: 0})));
 
	let x = RegexpDFA::new(&String::from("*"));
 
	assert!(matches!(x, Err(ParsingError::Asterisk{s: _, pos: 0})));
 
}
 

	
 
#[test]
 
fn test_invalid_closing_parenthesis() {
 
	let x = RegexpNFA::new(&String::from("(a"));
 
	assert!(matches!(x, Err(ParsingError::ClosingParenthesis{s: _, pos: 0})));
 
	let x = RegexpDFA::new(&String::from("(a"));
 
	assert!(matches!(x, Err(ParsingError::ClosingParenthesis{s: _, pos: 0})));
 
}
 

	
 
#[test]
 
fn test_invalid_opening_parenthesis() {
 
	let x = RegexpNFA::new(&String::from("a)"));
 
	assert!(matches!(x, Err(ParsingError::OpeningParenthesis{s: _, pos: 1})));
 
	let x = RegexpDFA::new(&String::from("a)"));
 
	assert!(matches!(x, Err(ParsingError::OpeningParenthesis{s: _, pos: 1})));
 
}
 

	
 
#[test]
 
fn test_invalid_empty_variant_start() {
 
	let x = RegexpNFA::new(&String::from("a(|b)"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
	let x = RegexpDFA::new(&String::from("a(|b)"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
}
 

	
 
#[test]
 
fn test_invalid_empty_variant_end() {
 
	let x = RegexpNFA::new(&String::from("a|"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
	let x = RegexpDFA::new(&String::from("a|"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
}
 

	
 
#[test]
 
fn test_invalid_empty_variant_middle() {
 
	let x = RegexpNFA::new(&String::from("a||b"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
	let x = RegexpDFA::new(&String::from("a||b"));
 
	assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant)));
 
}
0 comments (0 inline, 0 general)