# HG changeset patch # User Laman # Date 2024-06-12 23:27:53 # Node ID a86ff65ec3210aaea7ba84221872d0f705778249 # Parent a522e81d4f37d303b78410afe3c64f146b0e4f9f fixed the negative match for an empty string diff --git a/regexp.py b/regexp.py --- a/regexp.py +++ b/regexp.py @@ -92,6 +92,10 @@ class Chain(Token): else: previous = [token] + @property + def is_skippable(self): + return all(x.is_skippable for x in self.content) + def __str__(self): return "(" + "".join(str(x) for x in self.content) + ")" @@ -159,6 +163,8 @@ class Regexp: rules[key].add(j) end_states = set(r.list_last()) + if r.is_skippable: + end_states.add(-1) return rules, end_states @@ -177,7 +183,7 @@ class Regexp: if __name__ == "__main__": - tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] + tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"] for pattern in ["a*b*", "a+b+", "(ab)*", "(ab)+", "a((bc)*d)*"]: print("#", pattern) r = Regexp(pattern) diff --git a/src/main.rs b/src/main.rs --- a/src/main.rs +++ b/src/main.rs @@ -87,6 +87,10 @@ impl Token for Plus { } impl Token for Chain { + fn is_skippable(&self) -> bool { + return self.content.iter().all(|x| x.is_skippable()); + } + fn list_first(&self) -> Vec { let mut res = Vec::new(); for token in self.content.iter() { @@ -206,7 +210,10 @@ impl Regexp { }; } - let end_states = HashSet::from_iter(r.list_last().into_iter()); + let mut end_states = HashSet::from_iter(r.list_last().into_iter()); + if r.is_skippable() { + end_states.insert(START); + } return Regexp{rules, end_states}; } @@ -232,7 +239,7 @@ impl Regexp { } fn main() { - let tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]; + let tests = ["", "a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]; for pattern in ["a*b*", "a+b+", "(ab)*", "(ab)+", "a((bc)*d)*"] { println!("# {pattern}"); let r = Regexp::new(&pattern.to_string());