Changeset - 171b6c89df1d
[Not reviewed]
default
0 0 1
Laman - 10 months ago 2024-06-09 18:36:48

a Python prototype
1 file changed with 136 insertions and 0 deletions:
regexp.py
136
0 comments (0 inline, 0 general)
regexp.py
Show inline comments
 
new file 100644
 
from abc import abstractmethod
 

	
 

	
 
class Token:
 
	is_skippable = False
 

	
 
	@abstractmethod
 
	def list_first(self):
 
		pass
 

	
 
	@abstractmethod
 
	def list_last(self):
 
		pass
 

	
 
	@abstractmethod
 
	def list_neighbours(self):
 
		pass
 

	
 

	
 
class Symbol(Token):
 
	def __init__(self, position, value):
 
		self.position = position
 
		self.value = value
 

	
 
	def list_first(self):
 
		yield self.position
 

	
 
	def list_last(self):
 
		yield self.position
 

	
 
	def list_neighbours(self):
 
		yield from []
 

	
 
	def __str__(self):
 
		return self.value
 

	
 

	
 
class Asterisk(Token):
 
	is_skippable = True
 

	
 
	def __init__(self, content: Token):
 
		self.content = content
 

	
 
	def list_first(self):
 
		yield from self.content.list_first()
 

	
 
	def list_last(self):
 
		yield from self.content.list_last()
 

	
 
	def list_neighbours(self):
 
		yield from self.content.list_neighbours()
 
		for x in self.list_last():
 
			for y in self.list_first():
 
				yield (x, y)
 

	
 
	def __str__(self):
 
		return str(self.content) + "*"
 

	
 

	
 
class Chain(Token):
 
	def __init__(self, content: list):
 
		self.content = content
 

	
 
	def list_first(self):
 
		for token in self.content:
 
			yield from token.list_first()
 
			if not token.is_skippable:
 
				break
 

	
 
	def list_last(self):
 
		for token in reversed(self.content):
 
			yield from token.list_last()
 
			if not token.is_skippable:
 
				break
 

	
 
	def list_neighbours(self):
 
		previous = []
 
		for token in self.content:
 
			for t in previous:
 
				for x in t.list_last():
 
					for y in token.list_first():
 
						yield (x, y)
 
			yield from token.list_neighbours()
 

	
 
			if token.is_skippable:
 
				previous.append(token)
 
			else:
 
				previous = [token]
 

	
 
	def __str__(self):
 
		return "(" + "".join(str(x) for x in self.content) + ")"
 

	
 

	
 
def find_closing_parenthesis(pattern, k):
 
	counter = 0
 

	
 
	for (i, c) in enumerate(pattern[k:]):
 
		if c == "(":
 
			counter += 1
 
		elif c == ")":
 
			counter -= 1
 
		if counter == 0:
 
			return k+i
 

	
 
	return None
 

	
 

	
 
def parse(pattern, offset=0):
 
	res = []
 

	
 
	i = 0
 
	while i < len(pattern):
 
		c = pattern[i]
 
		if c == "(":
 
			j = find_closing_parenthesis(pattern, i)
 
			inner_content = parse(pattern[i+1:j], offset+i+1)
 
			res.append(inner_content)
 
			i = j+1
 
		elif c == "*":
 
			token = res.pop()
 
			res.append(Asterisk(token))
 
			i += 1
 
		else:
 
			res.append(Symbol(i+offset, c))
 
			i += 1
 

	
 
	return Chain(res)
 

	
 

	
 
if __name__ == "__main__":
 
	for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"]:
 
		print("#", pattern)
 
		p = parse(pattern)
 
		print(p, list(p.list_first()), list(p.list_last()))
 
		print(list(p.list_neighbours()))
 
		print()
0 comments (0 inline, 0 general)