Changeset - c316bec3c0cd
[Not reviewed]
default
0 1 0
Laman - 10 months ago 2024-06-10 12:23:06

finished the prototype
1 file changed with 43 insertions and 3 deletions:
regexp.py
43
3
0 comments (0 inline, 0 general)
regexp.py
Show inline comments
 
@@ -82,55 +82,95 @@ class Chain(Token):
 
						yield (x, y)
 
			yield from token.list_neighbours()
 

	
 
			if token.is_skippable:
 
				previous.append(token)
 
			else:
 
				previous = [token]
 

	
 
	def __str__(self):
 
		return "(" + "".join(str(x) for x in self.content) + ")"
 

	
 

	
 
def find_closing_parenthesis(pattern, k):
 
	counter = 0
 

	
 
	for (i, c) in enumerate(pattern[k:]):
 
		if c == "(":
 
			counter += 1
 
		elif c == ")":
 
			counter -= 1
 
		if counter == 0:
 
			return k+i
 

	
 
	return None
 

	
 

	
 
def parse(pattern, offset=0):
 
	res = []
 

	
 
	i = 0
 
	while i < len(pattern):
 
		c = pattern[i]
 
		if c == "(":
 
			j = find_closing_parenthesis(pattern, i)
 
			inner_content = parse(pattern[i+1:j], offset+i+1)
 
			res.append(inner_content)
 
			i = j+1
 
		elif c == "*":
 
			token = res.pop()
 
			res.append(Asterisk(token))
 
			i += 1
 
		else:
 
			res.append(Symbol(i+offset, c))
 
			i += 1
 

	
 
	return Chain(res)
 

	
 

	
 
class Regexp:
 
	def __init__(self, s):
 
		(self.rules, self.end_states) = self._parse(s)
 

	
 
	def _parse(self, s):
 
		r = parse(s)
 
		rules = dict()
 

	
 
		for i in r.list_first():
 
			c = s[i]
 
			key = (-1, c)
 
			if key not in rules:
 
				rules[key] = set()
 
			rules[key].add(i)
 

	
 
		for (i, j) in r.list_neighbours():
 
			c = s[j]
 
			key = (i, c)
 
			if key not in rules:
 
				rules[key] = set()
 
			rules[key].add(j)
 

	
 
		end_states = set(r.list_last())
 

	
 
		return rules, end_states
 

	
 
	def match(self, s):
 
		current = {-1}
 

	
 
		for c in s:
 
			new_state = set()
 
			for st in current:
 
				key = (st, c)
 
				if key in self.rules:
 
					new_state.update(self.rules[key])
 
			current = new_state
 

	
 
		return any(st in self.end_states for st in current)
 

	
 

	
 
if __name__ == "__main__":
 
	tests = ["a", "ab", "aabb", "abab", "abcd", "abcbcdbcd"]
 
	for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"]:
 
		print("#", pattern)
 
		p = parse(pattern)
 
		print(p, list(p.list_first()), list(p.list_last()))
 
		print(list(p.list_neighbours()))
 
		for t in tests:
 
			print(t, Regexp(pattern).match(t))
 
		print()
0 comments (0 inline, 0 general)