# HG changeset patch # User Laman # Date 2024-06-20 13:49:15 # Node ID 68c16b6d84f39bf6f5ea3b3be16ccc5037abf813 # Parent e9496b21cf64964a03ba21a77aa7f051be0c683d added the lambda transition diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -21,6 +21,19 @@ class Token: pass +class Lambda(Token): + is_skippable = True + + def list_first(self): + yield from [] + + def list_last(self): + yield from [] + + def list_neighbours(self): + yield from [] + + class Symbol(Token): def __init__(self, position, value): self.position = position @@ -188,6 +201,9 @@ def parse(pattern, offset=0): is_alternative = True res.append(AlternativeSeparator()) i += 1 + elif c == "_": + res.append(Lambda()) + i += 1 else: res.append(Symbol(i+offset, c)) i += 1 diff --git a/src/regexp/token.rs b/src/regexp/token.rs --- a/src/regexp/token.rs +++ b/src/regexp/token.rs @@ -52,6 +52,7 @@ pub struct Chain { } pub enum Token { + Lambda, Symbol(Symbol), Asterisk(Asterisk), Plus(Plus), @@ -225,6 +226,7 @@ impl Chain { impl Token { pub fn is_skippable(&self) -> bool { match self { + Token::Lambda => true, Token::Symbol(_) => false, Token::Asterisk(_) => true, Token::Plus(_) => false, @@ -236,6 +238,7 @@ impl Token { pub fn list_first(&self) -> Vec { match self { + Token::Lambda => vec![], Token::Symbol(t) => t.list_first(), Token::Asterisk(t) => t.list_first(), Token::Plus(t) => t.list_first(), @@ -247,6 +250,7 @@ impl Token { pub fn list_last(&self) -> Vec { match self { + Token::Lambda => vec![], Token::Symbol(t) => t.list_last(), Token::Asterisk(t) => t.list_last(), Token::Plus(t) => t.list_last(), @@ -258,6 +262,7 @@ impl Token { pub fn list_neighbours(&self) -> Vec<(usize, usize)> { match self { + Token::Lambda => vec![], Token::Symbol(t) => t.list_neighbours(), Token::Asterisk(t) => t.list_neighbours(), Token::Plus(t) => t.list_neighbours(), @@ -313,6 +318,10 @@ pub fn parse(pattern: &String, offset: u res.push(Box::new(Token::AlternativeSeparator)); i += 1; } + '_' => { + res.push(Box::new(Token::Lambda)); + i += 1; + } _c => { res.push(Box::new(Token::Symbol(Symbol{position: i+offset}))); i += 1; diff --git a/tests/test_regexp.rs b/tests/test_regexp.rs --- a/tests/test_regexp.rs +++ b/tests/test_regexp.rs @@ -89,6 +89,32 @@ fn test_eval_alternative_dfa() { } #[test] +fn test_eval_lambda_nfa() { + let r = Regexp::new(&String::from("a_")).unwrap(); + assert!(r.eval(String::from("a"))); + assert!(!r.eval(String::from(""))); + assert!(!r.eval(String::from("ab"))); + + let r = Regexp::new(&String::from("a|_")).unwrap(); + assert!(r.eval(String::from("a"))); + assert!(r.eval(String::from(""))); + assert!(!r.eval(String::from("b"))); +} + +#[test] +fn test_eval_lambda_dfa() { + let r = Regexp::new(&String::from("a_")).unwrap().determinize(); + assert!(r.eval(String::from("a"))); + assert!(!r.eval(String::from(""))); + assert!(!r.eval(String::from("ab"))); + + let r = Regexp::new(&String::from("a|_")).unwrap().determinize(); + assert!(r.eval(String::from("a"))); + assert!(r.eval(String::from(""))); + assert!(!r.eval(String::from("b"))); +} + +#[test] fn test_invalid_asterisk() { let x = Regexp::new(&String::from("*")); assert!(matches!(x, Err(ParsingError::Asterisk{s: _, pos: 0})));