diff --git a/src/lib.rs b/src/lib.rs --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,2 @@ mod regexp; -pub use regexp::{RegexpNFA, ParsingError}; +pub use regexp::{RegexpNFA, RegexpDFA, ParsingError}; diff --git a/src/main.rs b/src/main.rs --- a/src/main.rs +++ b/src/main.rs @@ -1,21 +1,22 @@ use std::env; -use regexp::RegexpNFA; +use regexp::RegexpDFA; fn test() { let tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]; for pattern in ["a(b|c)", "a*b*", "(ab)*", "a((bc)*d)*", "(a|b)*a(a|b)(a|b)(a|b)", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"] { println!("# {pattern}"); - let r = match RegexpNFA::new(&pattern.to_string()) { - Ok(r1) => r1.determinize().reduce().normalize(), + match RegexpDFA::new(&pattern.to_string()) { + Ok(r) => { + for &t in tests.iter() { + println!("{t} {}", r.eval(t.to_string())); + } + println!(); + }, Err(e) => { println!("{e}"); continue; } - }; - for &t in tests.iter() { - println!("{t} {}", r.eval(t.to_string())); } - println!(); } } @@ -24,27 +25,16 @@ fn main() { match args[1].as_str() { "test" => test(), "match" => { - let r = match RegexpNFA::new(&args[2].to_string()) { - Ok(r1) => r1.determinize().reduce().normalize(), + match RegexpDFA::new(&args[2].to_string()) { + Ok(r) => println!("{}", r.eval(args[3].to_string())), Err(e) => { panic!("ERROR: {e}"); } - }; - println!("{}", r.eval(args[3].to_string())); + } }, "compare" => { - let r1 = match RegexpNFA::new(&args[2].to_string()) { - Ok(r) => r.determinize().reduce().normalize(), - Err(e) => { - panic!("ERROR: {e}"); - } - }; - let r2 = match RegexpNFA::new(&args[3].to_string()) { - Ok(r) => r.determinize().reduce().normalize(), - Err(e) => { - panic!("ERROR: {e}"); - } - }; + let r1 = RegexpDFA::new(&args[2].to_string()).unwrap_or_else(|e| panic!("ERROR: {e}")); + let r2 = RegexpDFA::new(&args[3].to_string()).unwrap_or_else(|e| panic!("ERROR: {e}")); println!("{}", r1.find_distinguishing_string(&r2).unwrap_or("None".to_string())); }, s => { diff --git a/src/regexp.rs b/src/regexp.rs --- a/src/regexp.rs +++ b/src/regexp.rs @@ -124,7 +124,16 @@ impl RegexpNFA { compact_rules = compact_rules.into_iter().map(|st| if st != FAIL {st} else {fail}).collect(); compact_rules.extend(iter::repeat(fail).take(n)); - return RegexpDFA::new(compact_rules, end_states, alphabet_index); + if compact_rules.len() > 0 { + return RegexpDFA{rules: compact_rules, end_states, alphabet_index}; + } else { + // return a minimal non-empty DFA + return RegexpDFA{ + rules: vec![1, 1], + end_states, + alphabet_index: HashMap::from([('\0', 0)]) + }; + } } } @@ -138,18 +147,9 @@ pub struct RegexpDFA { } impl RegexpDFA { - /// Construct a DFA with the provided parameters, or a minimal DFA if the parameters are empty. - pub fn new(rules: Vec, end_states: HashSet, alphabet_index: HashMap) -> RegexpDFA { - if rules.len() > 0 { - return RegexpDFA{rules, end_states, alphabet_index}; - } else { - // this saves us checking for an empty `alphabet_index` in other methods. - return RegexpDFA{ - rules: vec![1, 1], - end_states, - alphabet_index: HashMap::from([('\0', 0)]) - }; - } + pub fn new(pattern: &String) -> Result { + let nfa = RegexpNFA::new(pattern)?; + return Ok(nfa.determinize().reduce().normalize()); } /// Decide if a string matches the regexp. @@ -205,7 +205,7 @@ impl RegexpDFA { } /// Find the shortest string that is accepted by self or `r`, but not both. - /// It is expected that the automatons are already reduced and normalized. + /// The input automatons have to be already reduced and normalized (they are in the `new` constructor). pub fn find_distinguishing_string(&self, other: &RegexpDFA) -> Option { if self.rules == other.rules && self.end_states == other.end_states { return None; diff --git a/tests/test_regexp.rs b/tests/test_regexp.rs --- a/tests/test_regexp.rs +++ b/tests/test_regexp.rs @@ -1,4 +1,5 @@ use regexp::RegexpNFA; +use regexp::RegexpDFA; use regexp::ParsingError; #[test] @@ -11,7 +12,7 @@ fn test_eval_basic_nfa() { #[test] fn test_eval_basic_dfa() { - let r = RegexpNFA::new(&String::from("abc")).unwrap().determinize().reduce().normalize(); + let r = RegexpDFA::new(&String::from("abc")).unwrap(); assert!(r.eval(String::from("abc"))); assert!(!r.eval(String::from("ab"))); assert!(!r.eval(String::from("abcd"))); @@ -27,9 +28,9 @@ fn test_eval_empty_nfa() { #[test] fn test_eval_empty_dfa() { - assert!(RegexpNFA::new(&String::from("a*")).unwrap().determinize().reduce().normalize().eval(String::from(""))); - assert!(RegexpNFA::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from(""))); - assert!(!RegexpNFA::new(&String::from("")).unwrap().determinize().reduce().normalize().eval(String::from("a"))); + assert!(RegexpDFA::new(&String::from("a*")).unwrap().eval(String::from(""))); + assert!(RegexpDFA::new(&String::from("")).unwrap().eval(String::from(""))); + assert!(!RegexpDFA::new(&String::from("")).unwrap().eval(String::from("a"))); } #[test] @@ -43,7 +44,7 @@ fn test_eval_asterisk_nfa() { #[test] fn test_eval_asterisk_dfa() { - let r = RegexpNFA::new(&String::from("a*b*")).unwrap().determinize().normalize().reduce(); + let r = RegexpDFA::new(&String::from("a*b*")).unwrap(); assert!(r.eval(String::from("a"))); assert!(r.eval(String::from("ab"))); assert!(r.eval(String::from("aabb"))); @@ -62,7 +63,7 @@ fn test_eval_alternative_nfa() { #[test] fn test_eval_alternative_dfa() { - let r = RegexpNFA::new(&String::from("a|b|c")).unwrap().determinize().reduce().normalize(); + let r = RegexpDFA::new(&String::from("a|b|c")).unwrap(); assert!(r.eval(String::from("a"))); assert!(r.eval(String::from("b"))); assert!(r.eval(String::from("c"))); @@ -85,12 +86,12 @@ fn test_eval_lambda_nfa() { #[test] fn test_eval_lambda_dfa() { - let r = RegexpNFA::new(&String::from("a_")).unwrap().determinize().reduce().normalize(); + let r = RegexpDFA::new(&String::from("a_")).unwrap(); assert!(r.eval(String::from("a"))); assert!(!r.eval(String::from(""))); assert!(!r.eval(String::from("ab"))); - let r = RegexpNFA::new(&String::from("a|_")).unwrap().determinize().reduce().normalize(); + let r = RegexpDFA::new(&String::from("a|_")).unwrap(); assert!(r.eval(String::from("a"))); assert!(r.eval(String::from(""))); assert!(!r.eval(String::from("b"))); @@ -100,34 +101,46 @@ fn test_eval_lambda_dfa() { fn test_invalid_asterisk() { let x = RegexpNFA::new(&String::from("*")); assert!(matches!(x, Err(ParsingError::Asterisk{s: _, pos: 0}))); + let x = RegexpDFA::new(&String::from("*")); + assert!(matches!(x, Err(ParsingError::Asterisk{s: _, pos: 0}))); } #[test] fn test_invalid_closing_parenthesis() { let x = RegexpNFA::new(&String::from("(a")); assert!(matches!(x, Err(ParsingError::ClosingParenthesis{s: _, pos: 0}))); + let x = RegexpDFA::new(&String::from("(a")); + assert!(matches!(x, Err(ParsingError::ClosingParenthesis{s: _, pos: 0}))); } #[test] fn test_invalid_opening_parenthesis() { let x = RegexpNFA::new(&String::from("a)")); assert!(matches!(x, Err(ParsingError::OpeningParenthesis{s: _, pos: 1}))); + let x = RegexpDFA::new(&String::from("a)")); + assert!(matches!(x, Err(ParsingError::OpeningParenthesis{s: _, pos: 1}))); } #[test] fn test_invalid_empty_variant_start() { let x = RegexpNFA::new(&String::from("a(|b)")); assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant))); + let x = RegexpDFA::new(&String::from("a(|b)")); + assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant))); } #[test] fn test_invalid_empty_variant_end() { let x = RegexpNFA::new(&String::from("a|")); assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant))); + let x = RegexpDFA::new(&String::from("a|")); + assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant))); } #[test] fn test_invalid_empty_variant_middle() { let x = RegexpNFA::new(&String::from("a||b")); assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant))); + let x = RegexpDFA::new(&String::from("a||b")); + assert!(matches!(x, Err(ParsingError::EmptyAlternativeVariant))); }