Files @ 171b6c89df1d
Branch filter:

Location: Regular-Expresso/regexp.py - annotation

Laman
a Python prototype
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
171b6c89df1d
from abc import abstractmethod


class Token:
	is_skippable = False

	@abstractmethod
	def list_first(self):
		pass

	@abstractmethod
	def list_last(self):
		pass

	@abstractmethod
	def list_neighbours(self):
		pass


class Symbol(Token):
	def __init__(self, position, value):
		self.position = position
		self.value = value

	def list_first(self):
		yield self.position

	def list_last(self):
		yield self.position

	def list_neighbours(self):
		yield from []

	def __str__(self):
		return self.value


class Asterisk(Token):
	is_skippable = True

	def __init__(self, content: Token):
		self.content = content

	def list_first(self):
		yield from self.content.list_first()

	def list_last(self):
		yield from self.content.list_last()

	def list_neighbours(self):
		yield from self.content.list_neighbours()
		for x in self.list_last():
			for y in self.list_first():
				yield (x, y)

	def __str__(self):
		return str(self.content) + "*"


class Chain(Token):
	def __init__(self, content: list):
		self.content = content

	def list_first(self):
		for token in self.content:
			yield from token.list_first()
			if not token.is_skippable:
				break

	def list_last(self):
		for token in reversed(self.content):
			yield from token.list_last()
			if not token.is_skippable:
				break

	def list_neighbours(self):
		previous = []
		for token in self.content:
			for t in previous:
				for x in t.list_last():
					for y in token.list_first():
						yield (x, y)
			yield from token.list_neighbours()

			if token.is_skippable:
				previous.append(token)
			else:
				previous = [token]

	def __str__(self):
		return "(" + "".join(str(x) for x in self.content) + ")"


def find_closing_parenthesis(pattern, k):
	counter = 0

	for (i, c) in enumerate(pattern[k:]):
		if c == "(":
			counter += 1
		elif c == ")":
			counter -= 1
		if counter == 0:
			return k+i

	return None


def parse(pattern, offset=0):
	res = []

	i = 0
	while i < len(pattern):
		c = pattern[i]
		if c == "(":
			j = find_closing_parenthesis(pattern, i)
			inner_content = parse(pattern[i+1:j], offset+i+1)
			res.append(inner_content)
			i = j+1
		elif c == "*":
			token = res.pop()
			res.append(Asterisk(token))
			i += 1
		else:
			res.append(Symbol(i+offset, c))
			i += 1

	return Chain(res)


if __name__ == "__main__":
	for pattern in ["a*b*", "(ab)*", "a((bc)*d)*"]:
		print("#", pattern)
		p = parse(pattern)
		print(p, list(p.list_first()), list(p.list_last()))
		print(list(p.list_neighbours()))
		print()